* Fix PR43432, vectorize backwards stepping loads
@ 2010-09-07 13:11 Michael Matz
2010-09-08 7:51 ` Ira Rosen
0 siblings, 1 reply; 11+ messages in thread
From: Michael Matz @ 2010-09-07 13:11 UTC (permalink / raw)
To: gcc-patches
Hi,
this implements handling of consecutive loads whose indices run backwards.
I'm handling the easy cases only, namely when no explicit realignment
instructions need to be generated and when we need to emit only one load
per original load (i.e. no multiple type width in the loop).
This all requires that we be able to reverse the elements of a vector,
hence two new helper functions for that are added.
Reversed stores aren't handled but should be reasonably easy to extend.
Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
target-supports.exp/vect_perm recognition which in turn makes
vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
because while x86_64 supports permutation for 4 and 8-sized elements it
doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
to investigate this but seek feedback on the patch itself already now.
Ciao,
Michael.
--
PR tree-optimization/43432
* tree-vect-data-refs.c (vect_analyze_data_ref_access):
Accept backwards consecutive accesses.
(vect_create_data_ref_ptr): If step is negative generate
decreasing IVs.
* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
(perm_mask_for_reverse, reverse_vec_elements): New functions.
(vectorizable_load): Handle loads with negative steps when easily
possible.
testsuite/
PR tree-optimization/43432
* gcc.dg/vect/pr43432.c: New test.
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c (revision 163773)
+++ tree-vect-data-refs.c (working copy)
@@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat
}
/* Consecutive? */
- if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+ if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
+ || (dr_step < 0
+ && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
{
/* Mark that it is not interleaving. */
DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
@@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s
tree vptr;
gimple_stmt_iterator incr_gsi;
bool insert_after;
+ bool negative;
tree indx_before_incr, indx_after_incr;
gimple incr;
tree step;
@@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s
*inv_p = true;
else
*inv_p = false;
+ negative = tree_int_cst_compare (step, size_zero_node) < 0;
/* Create an expression for the first address accessed by this load
in LOOP. */
@@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s
LOOP is zero. In this case the step here is also zero. */
if (*inv_p)
step = size_zero_node;
+ else if (negative)
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c (revision 163773)
+++ tree-vect-stmts.c (working copy)
@@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step for store.");
+ return false;
+ }
+
if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
{
strided_store = true;
@@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_
return true;
}
+/* Given a vector type VECTYPE returns a builtin DECL to be used
+ for vector permutation and stores a mask into *MASK that implements
+ reversal of the vector elements. If that is impossible to do
+ returns NULL (and *MASK is unchanged). */
+
+static tree
+perm_mask_for_reverse (tree vectype, tree *mask)
+{
+ tree builtin_decl;
+ tree mask_element_type, mask_type;
+ tree mask_vec = NULL;
+ int i;
+ int nunits;
+ if (!targetm.vectorize.builtin_vec_perm)
+ return NULL;
+
+ builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
+ &mask_element_type);
+ if (!builtin_decl || !mask_element_type)
+ return NULL;
+
+ mask_type = get_vectype_for_scalar_type (mask_element_type);
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
+ return NULL;
+
+ for (i = 0; i < nunits; i++)
+ mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
+ mask_vec = build_vector (mask_type, mask_vec);
+
+ if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
+ return NULL;
+ if (mask)
+ *mask = mask_vec;
+ return builtin_decl;
+}
+
+/* Given a vector variable X, that was generated for the scalar LHS of
+ STMT, generate instructions to reverse the vector elements of X,
+ insert them a *GSI and return the permuted vector variable. */
+
+static tree
+reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
+{
+ tree vectype = TREE_TYPE (x);
+ tree mask_vec, builtin_decl;
+ tree perm_dest, data_ref;
+ gimple perm_stmt;
+
+ builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
+
+ perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
+
+ /* Generate the permute statement. */
+ perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
+ data_ref = make_ssa_name (perm_dest, perm_stmt);
+ gimple_call_set_lhs (perm_stmt, data_ref);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+
+ return data_ref;
+}
+
/* vectorizable_load.
Check if STMT reads a non scalar data-ref (array/pointer/structure) that
@@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s
gimple first_stmt;
tree scalar_type;
bool inv_p;
+ bool negative;
bool compute_in_loop = false;
struct loop *at_loop;
int vec_num;
@@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+ if (negative && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types with negative step.");
+ return false;
+ }
+
scalar_type = TREE_TYPE (DR_REF (dr));
mode = TYPE_MODE (vectype);
@@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s
return false;
}
+ if (negative)
+ {
+ gcc_assert (!strided_load);
+ alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+ if (alignment_support_scheme != dr_aligned
+ && alignment_support_scheme != dr_unaligned_supported)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step but alignment required.");
+ return false;
+ }
+ if (!perm_mask_for_reverse (vectype, NULL))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step and reversing not supported.");
+ return false;
+ }
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
@@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s
else
at_loop = loop;
+ if (negative)
+ offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
+
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
@@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s
gcc_unreachable (); /* FORNOW. */
}
+ if (negative)
+ {
+ new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ }
+
/* Collect vector loads and later create their permutation in
vect_transform_strided_load (). */
if (strided_load || slp_perm)
Index: testsuite/gcc.dg/vect/pr43432.c
===================================================================
--- testsuite/gcc.dg/vect/pr43432.c (revision 0)
+++ testsuite/gcc.dg/vect/pr43432.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
+
+
+void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
+int len){
+ int i;
+ src1 += len-1;
+ for(i=0; i<len; i++)
+ dst[i] = src0[i] * src1[-i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-07 13:11 Fix PR43432, vectorize backwards stepping loads Michael Matz
@ 2010-09-08 7:51 ` Ira Rosen
2010-09-09 15:01 ` Michael Matz
0 siblings, 1 reply; 11+ messages in thread
From: Ira Rosen @ 2010-09-08 7:51 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches
gcc-patches-owner@gcc.gnu.org wrote on 07/09/2010 03:58:56 PM:
>
> Hi,
>
> this implements handling of consecutive loads whose indices run
backwards.
> I'm handling the easy cases only, namely when no explicit realignment
> instructions need to be generated and when we need to emit only one load
> per original load (i.e. no multiple type width in the loop).
>
> This all requires that we be able to reverse the elements of a vector,
> hence two new helper functions for that are added.
>
> Reversed stores aren't handled but should be reasonably easy to extend.
>
> Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
> target-supports.exp/vect_perm recognition which in turn makes
> vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
> because while x86_64 supports permutation for 4 and 8-sized elements it
> doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
> to investigate this but seek feedback on the patch itself already now.
>
Looks good to me.
Ira
>
> Ciao,
> Michael.
> --
> PR tree-optimization/43432
> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
> Accept backwards consecutive accesses.
> (vect_create_data_ref_ptr): If step is negative generate
> decreasing IVs.
> * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
> (perm_mask_for_reverse, reverse_vec_elements): New functions.
> (vectorizable_load): Handle loads with negative steps when easily
> possible.
>
> testsuite/
> PR tree-optimization/43432
> * gcc.dg/vect/pr43432.c: New test.
>
> Index: tree-vect-data-refs.c
> ===================================================================
> --- tree-vect-data-refs.c (revision 163773)
> +++ tree-vect-data-refs.c (working copy)
> @@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat
> }
>
> /* Consecutive? */
> - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
> + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
> + || (dr_step < 0
> + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
> {
> /* Mark that it is not interleaving. */
> DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
> @@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s
> tree vptr;
> gimple_stmt_iterator incr_gsi;
> bool insert_after;
> + bool negative;
> tree indx_before_incr, indx_after_incr;
> gimple incr;
> tree step;
> @@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s
> *inv_p = true;
> else
> *inv_p = false;
> + negative = tree_int_cst_compare (step, size_zero_node) < 0;
>
> /* Create an expression for the first address accessed by this load
> in LOOP. */
> @@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s
> LOOP is zero. In this case the step here is also zero. */
> if (*inv_p)
> step = size_zero_node;
> + else if (negative)
> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
>
> standard_iv_increment_position (loop, &incr_gsi, &insert_after);
>
> Index: tree-vect-stmts.c
> ===================================================================
> --- tree-vect-stmts.c (revision 163773)
> +++ tree-vect-stmts.c (working copy)
> @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step for store.");
> + return false;
> + }
> +
> if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
> {
> strided_store = true;
> @@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_
> return true;
> }
>
> +/* Given a vector type VECTYPE returns a builtin DECL to be used
> + for vector permutation and stores a mask into *MASK that implements
> + reversal of the vector elements. If that is impossible to do
> + returns NULL (and *MASK is unchanged). */
> +
> +static tree
> +perm_mask_for_reverse (tree vectype, tree *mask)
> +{
> + tree builtin_decl;
> + tree mask_element_type, mask_type;
> + tree mask_vec = NULL;
> + int i;
> + int nunits;
> + if (!targetm.vectorize.builtin_vec_perm)
> + return NULL;
> +
> + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
> +
&mask_element_type);
> + if (!builtin_decl || !mask_element_type)
> + return NULL;
> +
> + mask_type = get_vectype_for_scalar_type (mask_element_type);
> + nunits = TYPE_VECTOR_SUBPARTS (vectype);
> + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS
(mask_type))
> + return NULL;
> +
> + for (i = 0; i < nunits; i++)
> + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type,
> i), mask_vec);
> + mask_vec = build_vector (mask_type, mask_vec);
> +
> + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
> + return NULL;
> + if (mask)
> + *mask = mask_vec;
> + return builtin_decl;
> +}
> +
> +/* Given a vector variable X, that was generated for the scalar LHS of
> + STMT, generate instructions to reverse the vector elements of X,
> + insert them a *GSI and return the permuted vector variable. */
> +
> +static tree
> +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
> +{
> + tree vectype = TREE_TYPE (x);
> + tree mask_vec, builtin_decl;
> + tree perm_dest, data_ref;
> + gimple perm_stmt;
> +
> + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
> +
> + perm_dest = vect_create_destination_var (gimple_assign_lhs
> (stmt), vectype);
> +
> + /* Generate the permute statement. */
> + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
> + data_ref = make_ssa_name (perm_dest, perm_stmt);
> + gimple_call_set_lhs (perm_stmt, data_ref);
> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
> +
> + return data_ref;
> +}
> +
> /* vectorizable_load.
>
> Check if STMT reads a non scalar data-ref (array/pointer/structure)
that
> @@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s
> gimple first_stmt;
> tree scalar_type;
> bool inv_p;
> + bool negative;
> bool compute_in_loop = false;
> struct loop *at_loop;
> int vec_num;
> @@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
> + if (negative && ncopies > 1)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "multiple types with negative step.");
> + return false;
> + }
> +
> scalar_type = TREE_TYPE (DR_REF (dr));
> mode = TYPE_MODE (vectype);
>
> @@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s
> return false;
> }
>
> + if (negative)
> + {
> + gcc_assert (!strided_load);
> + alignment_support_scheme = vect_supportable_dr_alignment (dr,
false);
> + if (alignment_support_scheme != dr_aligned
> + && alignment_support_scheme != dr_unaligned_supported)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step but alignment required.");
> + return false;
> + }
> + if (!perm_mask_for_reverse (vectype, NULL))
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step and reversing not
supported.");
> + return false;
> + }
> + }
> +
> if (!vec_stmt) /* transformation not required. */
> {
> STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> @@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s
> else
> at_loop = loop;
>
> + if (negative)
> + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
> +
> prev_stmt_info = NULL;
> for (j = 0; j < ncopies; j++)
> {
> @@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s
> gcc_unreachable (); /* FORNOW. */
> }
>
> + if (negative)
> + {
> + new_temp = reverse_vec_elements (new_temp, stmt, gsi);
> + new_stmt = SSA_NAME_DEF_STMT (new_temp);
> + }
> +
> /* Collect vector loads and later create their permutation in
> vect_transform_strided_load (). */
> if (strided_load || slp_perm)
> Index: testsuite/gcc.dg/vect/pr43432.c
> ===================================================================
> --- testsuite/gcc.dg/vect/pr43432.c (revision 0)
> +++ testsuite/gcc.dg/vect/pr43432.c (revision 0)
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
> +
> +
> +void vector_fmul_reverse_c(float *dst, const float *src0, const float
*src1,
> +int len){
> + int i;
> + src1 += len-1;
> + for(i=0; i<len; i++)
> + dst[i] = src0[i] * src1[-i];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target { vect_perm } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-08 7:51 ` Ira Rosen
@ 2010-09-09 15:01 ` Michael Matz
2010-09-09 15:08 ` Paolo Carlini
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Michael Matz @ 2010-09-09 15:01 UTC (permalink / raw)
To: gcc-patches; +Cc: Ira Rosen
Hi,
On Wed, 8 Sep 2010, Ira Rosen wrote:
> > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
> > target-supports.exp/vect_perm recognition which in turn makes
> > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
> > because while x86_64 supports permutation for 4 and 8-sized elements it
> > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
> > to investigate this but seek feedback on the patch itself already now.
> >
>
> Looks good to me.
Okay, so here's the complete patch. Functionality is unchanged, but I
extended the testsuite to have vect_perm_byte and vect_perm_short
predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can
enable vect_perm on x86_64. With this one I get no regressions on
x86_64-linux (all default languages). Okay for trunk?
Ciao,
Michael.
--
PR tree-optimization/43432
* tree-vect-data-refs.c (vect_analyze_data_ref_access):
Accept backwards consecutive accesses.
(vect_create_data_ref_ptr): If step is negative generate
decreasing IVs.
* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
(perm_mask_for_reverse, reverse_vec_elements): New functions.
(vectorizable_load): Handle loads with negative steps when easily
possible.
testsuite/
PR tree-optimization/43432
* lib/target-supports.exp (check_effective_target_vect_perm_byte,
check_effective_target_vect_perm_short): New predicates.
(check_effective_target_vect_perm): Include x86_64.
* gcc.dg/vect/pr43432.c: New test.
* gcc.dg/vect/vect-114.c: Adjust.
* gcc.dg/vect/vect-15.c: Ditto.
* gcc.dg/vect/slp-perm-8.c: Use new predicate.
* gcc.dg/vect/slp-perm-9.c: Ditto.
Index: tree-vect-data-refs.c
===================================================================
--- tree-vect-data-refs.c (revision 163997)
+++ tree-vect-data-refs.c (working copy)
@@ -2282,7 +2282,9 @@ vect_analyze_data_ref_access (struct dat
}
/* Consecutive? */
- if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
+ if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
+ || (dr_step < 0
+ && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
{
/* Mark that it is not interleaving. */
DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
@@ -2954,6 +2956,7 @@ vect_create_data_ref_ptr (gimple stmt, s
tree vptr;
gimple_stmt_iterator incr_gsi;
bool insert_after;
+ bool negative;
tree indx_before_incr, indx_after_incr;
gimple incr;
tree step;
@@ -2986,6 +2989,7 @@ vect_create_data_ref_ptr (gimple stmt, s
*inv_p = true;
else
*inv_p = false;
+ negative = tree_int_cst_compare (step, size_zero_node) < 0;
/* Create an expression for the first address accessed by this load
in LOOP. */
@@ -3145,6 +3149,8 @@ vect_create_data_ref_ptr (gimple stmt, s
LOOP is zero. In this case the step here is also zero. */
if (*inv_p)
step = size_zero_node;
+ else if (negative)
+ step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
Index: tree-vect-stmts.c
===================================================================
--- tree-vect-stmts.c (revision 163999)
+++ tree-vect-stmts.c (working copy)
@@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step for store.");
+ return false;
+ }
+
if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
{
strided_store = true;
@@ -3412,6 +3419,68 @@ vectorizable_store (gimple stmt, gimple_
return true;
}
+/* Given a vector type VECTYPE returns a builtin DECL to be used
+ for vector permutation and stores a mask into *MASK that implements
+ reversal of the vector elements. If that is impossible to do
+ returns NULL (and *MASK is unchanged). */
+
+static tree
+perm_mask_for_reverse (tree vectype, tree *mask)
+{
+ tree builtin_decl;
+ tree mask_element_type, mask_type;
+ tree mask_vec = NULL;
+ int i;
+ int nunits;
+ if (!targetm.vectorize.builtin_vec_perm)
+ return NULL;
+
+ builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
+ &mask_element_type);
+ if (!builtin_decl || !mask_element_type)
+ return NULL;
+
+ mask_type = get_vectype_for_scalar_type (mask_element_type);
+ nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
+ return NULL;
+
+ for (i = 0; i < nunits; i++)
+ mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
+ mask_vec = build_vector (mask_type, mask_vec);
+
+ if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
+ return NULL;
+ if (mask)
+ *mask = mask_vec;
+ return builtin_decl;
+}
+
+/* Given a vector variable X, that was generated for the scalar LHS of
+ STMT, generate instructions to reverse the vector elements of X,
+ insert them a *GSI and return the permuted vector variable. */
+
+static tree
+reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
+{
+ tree vectype = TREE_TYPE (x);
+ tree mask_vec, builtin_decl;
+ tree perm_dest, data_ref;
+ gimple perm_stmt;
+
+ builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
+
+ perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
+
+ /* Generate the permute statement. */
+ perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
+ data_ref = make_ssa_name (perm_dest, perm_stmt);
+ gimple_call_set_lhs (perm_stmt, data_ref);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+
+ return data_ref;
+}
+
/* vectorizable_load.
Check if STMT reads a non scalar data-ref (array/pointer/structure) that
@@ -3454,6 +3523,7 @@ vectorizable_load (gimple stmt, gimple_s
gimple first_stmt;
tree scalar_type;
bool inv_p;
+ bool negative;
bool compute_in_loop = false;
struct loop *at_loop;
int vec_num;
@@ -3516,6 +3586,14 @@ vectorizable_load (gimple stmt, gimple_s
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
+ negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+ if (negative && ncopies > 1)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "multiple types with negative step.");
+ return false;
+ }
+
scalar_type = TREE_TYPE (DR_REF (dr));
mode = TYPE_MODE (vectype);
@@ -3550,6 +3628,25 @@ vectorizable_load (gimple stmt, gimple_s
return false;
}
+ if (negative)
+ {
+ gcc_assert (!strided_load);
+ alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
+ if (alignment_support_scheme != dr_aligned
+ && alignment_support_scheme != dr_unaligned_supported)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step but alignment required.");
+ return false;
+ }
+ if (!perm_mask_for_reverse (vectype, NULL))
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "negative step and reversing not supported.");
+ return false;
+ }
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
@@ -3724,6 +3821,9 @@ vectorizable_load (gimple stmt, gimple_s
else
at_loop = loop;
+ if (negative)
+ offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
+
prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++)
{
@@ -3912,6 +4012,12 @@ vectorizable_load (gimple stmt, gimple_s
gcc_unreachable (); /* FORNOW. */
}
+ if (negative)
+ {
+ new_temp = reverse_vec_elements (new_temp, stmt, gsi);
+ new_stmt = SSA_NAME_DEF_STMT (new_temp);
+ }
+
/* Collect vector loads and later create their permutation in
vect_transform_strided_load (). */
if (strided_load || slp_perm)
Index: testsuite/gcc.dg/vect/vect-114.c
===================================================================
--- testsuite/gcc.dg/vect/vect-114.c (revision 163997)
+++ testsuite/gcc.dg/vect/vect-114.c (working copy)
@@ -34,6 +34,7 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_perm } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-perm-8.c
===================================================================
--- testsuite/gcc.dg/vect/slp-perm-8.c (revision 163997)
+++ testsuite/gcc.dg/vect/slp-perm-8.c (working copy)
@@ -53,7 +53,7 @@ int main (int argc, const char* argv[])
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm_byte } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/pr43432.c
===================================================================
--- testsuite/gcc.dg/vect/pr43432.c (revision 0)
+++ testsuite/gcc.dg/vect/pr43432.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_float } */
+/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
+
+
+void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
+int len){
+ int i;
+ src1 += len-1;
+ for(i=0; i<len; i++)
+ dst[i] = src0[i] * src1[-i];
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/slp-perm-9.c
===================================================================
--- testsuite/gcc.dg/vect/slp-perm-9.c (revision 163997)
+++ testsuite/gcc.dg/vect/slp-perm-9.c (working copy)
@@ -54,7 +54,7 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
-/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm } } } */
+/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm_short } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/gcc.dg/vect/vect-15.c
===================================================================
--- testsuite/gcc.dg/vect/vect-15.c (revision 163997)
+++ testsuite/gcc.dg/vect/vect-15.c (working copy)
@@ -35,5 +35,5 @@ int main (void)
return main1 ();
}
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Index: testsuite/lib/target-supports.exp
===================================================================
--- testsuite/lib/target-supports.exp (revision 163997)
+++ testsuite/lib/target-supports.exp (working copy)
@@ -2366,7 +2366,8 @@ proc check_effective_target_vect_perm {
} else {
set et_vect_perm_saved 0
if { [istarget powerpc*-*-*]
- || [istarget spu-*-*] } {
+ || [istarget spu-*-*]
+ || [istarget x86_64-*-*] } {
set et_vect_perm_saved 1
}
}
@@ -2374,6 +2375,48 @@ proc check_effective_target_vect_perm {
return $et_vect_perm_saved
}
+# Return 1 if the target plus current options supports vector permutation
+# on byte-sized elements, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_perm_byte { } {
+ global et_vect_perm_byte
+
+ if [info exists et_vect_perm_byte_saved] {
+ verbose "check_effective_target_vect_perm_byte: using cached result" 2
+ } else {
+ set et_vect_perm_byte_saved 0
+ if { [istarget powerpc*-*-*]
+ || [istarget spu-*-*] } {
+ set et_vect_perm_byte_saved 1
+ }
+ }
+ verbose "check_effective_target_vect_perm_byte: returning $et_vect_perm_byte_saved" 2
+ return $et_vect_perm_byte_saved
+}
+
+# Return 1 if the target plus current options supports vector permutation
+# on short-sized elements, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_perm_short { } {
+ global et_vect_perm_short
+
+ if [info exists et_vect_perm_short_saved] {
+ verbose "check_effective_target_vect_perm_short: using cached result" 2
+ } else {
+ set et_vect_perm_short_saved 0
+ if { [istarget powerpc*-*-*]
+ || [istarget spu-*-*] } {
+ set et_vect_perm_short_saved 1
+ }
+ }
+ verbose "check_effective_target_vect_perm_short: returning $et_vect_perm_short_saved" 2
+ return $et_vect_perm_short_saved
+}
+
# Return 1 if the target plus current options supports a vector
# widening summation of *short* args into *int* result, 0 otherwise.
#
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-09 15:01 ` Michael Matz
@ 2010-09-09 15:08 ` Paolo Carlini
2010-09-09 15:22 ` Michael Matz
2010-09-12 11:23 ` Ira Rosen
2010-09-17 16:55 ` H.J. Lu
2 siblings, 1 reply; 11+ messages in thread
From: Paolo Carlini @ 2010-09-09 15:08 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches, Ira Rosen
... does this kind of work imply that we may be able to vectorize in
some simple cases (but not *so* simple, where we dispatch already to
memcpy & co) the *_backward C++ algorithms, like std::copy_backward? Do
you have any idea?
Paolo.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-09 15:08 ` Paolo Carlini
@ 2010-09-09 15:22 ` Michael Matz
2010-09-09 16:39 ` Paolo Carlini
0 siblings, 1 reply; 11+ messages in thread
From: Michael Matz @ 2010-09-09 15:22 UTC (permalink / raw)
To: Paolo Carlini; +Cc: gcc-patches, Ira Rosen
Hi,
On Thu, 9 Sep 2010, Paolo Carlini wrote:
> ... does this kind of work imply that we may be able to vectorize in
> some simple cases (but not *so* simple, where we dispatch already to
> memcpy & co) the *_backward C++ algorithms, like std::copy_backward? Do
> you have any idea?
With the patch we can vectorize now loops where the loads are consecutive
but run backwards. E.g.
for (i = 0; i < N; i++)
... = x[-i];
We cannot vectorize loops where the backward step is larger than the
element size (i.e. strided accesses, e.g. x[-2*i]). Neither can we
vectorize loops that involve negative steps but multipe vector element
counts (for instance a loop mixing chars and ints and vectorsize 16
bytes would need to emit four vector int loads (to get the 16 elements)
per vector char load which gives 16 elements directly). Loops requiring
complicated alignment patterns can't be vectorized either (doesn't happen
on x86_64). And last stores with negative steps can't be vectorized.
Except support for the complicated alignment patterns all the above should
be reasonably easy to extend.
That is, if the *_backward algorithms exhibit loads from memory of this
type, and that was the only reason until now that a loop in question
wasn't vectorized, then those loops can be vectorized with the patch.
I'm not too familiar with the STL algorithms, but from a quick glance the
only *_backward things are move_backward, copy_backward and
merge_backward. Those just seem to merely copy items around, not actually
doing something with them. But they do have also the stores stepping
backwards, so the patch would need extending to support that.
Ciao,
Michael.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-09 15:22 ` Michael Matz
@ 2010-09-09 16:39 ` Paolo Carlini
0 siblings, 0 replies; 11+ messages in thread
From: Paolo Carlini @ 2010-09-09 16:39 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches, Ira Rosen
Hi,
Thanks Michael for the clarifications.
> I'm not too familiar with the STL algorithms, but from a quick glance the
> only *_backward things are move_backward, copy_backward and
> merge_backward. Those just seem to merely copy items around, not actually
> doing something with them. But they do have also the stores stepping
> backwards, so the patch would need extending to support that.
>
Right, this is something I was missing: I'm pretty sure we have some
pretty straightforward patterns in some cases, but definitely backward
stores too. Unless you have, for example, a std::accumulate feed
reverse_iterators?!? In fact *any* algorithm becomes backward with
reverse_iterators, but those aren't that common, I'm afraid...
Paolo.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-09 15:01 ` Michael Matz
2010-09-09 15:08 ` Paolo Carlini
@ 2010-09-12 11:23 ` Ira Rosen
2010-09-17 16:55 ` H.J. Lu
2 siblings, 0 replies; 11+ messages in thread
From: Ira Rosen @ 2010-09-12 11:23 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches
Michael Matz <matz@suse.de> wrote on 09/09/2010 05:30:28 PM:
> Hi,
>
> On Wed, 8 Sep 2010, Ira Rosen wrote:
>
> > > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
> > > target-supports.exp/vect_perm recognition which in turn makes
> > > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most
probably
> > > because while x86_64 supports permutation for 4 and 8-sized elements
it
> > > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm
going
> > > to investigate this but seek feedback on the patch itself already
now.
> > >
> >
> > Looks good to me.
>
> Okay, so here's the complete patch. Functionality is unchanged, but I
> extended the testsuite to have vect_perm_byte and vect_perm_short
> predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can
> enable vect_perm on x86_64. With this one I get no regressions on
> x86_64-linux (all default languages). Okay for trunk?
Yes.
Thanks,
Ira
>
>
> Ciao,
> Michael.
> --
> PR tree-optimization/43432
> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
> Accept backwards consecutive accesses.
> (vect_create_data_ref_ptr): If step is negative generate
> decreasing IVs.
> * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
> (perm_mask_for_reverse, reverse_vec_elements): New functions.
> (vectorizable_load): Handle loads with negative steps when easily
> possible.
>
> testsuite/
> PR tree-optimization/43432
> * lib/target-supports.exp (check_effective_target_vect_perm_byte,
> check_effective_target_vect_perm_short): New predicates.
> (check_effective_target_vect_perm): Include x86_64.
> * gcc.dg/vect/pr43432.c: New test.
> * gcc.dg/vect/vect-114.c: Adjust.
> * gcc.dg/vect/vect-15.c: Ditto.
> * gcc.dg/vect/slp-perm-8.c: Use new predicate.
> * gcc.dg/vect/slp-perm-9.c: Ditto.
>
> Index: tree-vect-data-refs.c
> ===================================================================
> --- tree-vect-data-refs.c (revision 163997)
> +++ tree-vect-data-refs.c (working copy)
> @@ -2282,7 +2282,9 @@ vect_analyze_data_ref_access (struct dat
> }
>
> /* Consecutive? */
> - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
> + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
> + || (dr_step < 0
> + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
> {
> /* Mark that it is not interleaving. */
> DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
> @@ -2954,6 +2956,7 @@ vect_create_data_ref_ptr (gimple stmt, s
> tree vptr;
> gimple_stmt_iterator incr_gsi;
> bool insert_after;
> + bool negative;
> tree indx_before_incr, indx_after_incr;
> gimple incr;
> tree step;
> @@ -2986,6 +2989,7 @@ vect_create_data_ref_ptr (gimple stmt, s
> *inv_p = true;
> else
> *inv_p = false;
> + negative = tree_int_cst_compare (step, size_zero_node) < 0;
>
> /* Create an expression for the first address accessed by this load
> in LOOP. */
> @@ -3145,6 +3149,8 @@ vect_create_data_ref_ptr (gimple stmt, s
> LOOP is zero. In this case the step here is also zero. */
> if (*inv_p)
> step = size_zero_node;
> + else if (negative)
> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
>
> standard_iv_increment_position (loop, &incr_gsi, &insert_after);
>
> Index: tree-vect-stmts.c
> ===================================================================
> --- tree-vect-stmts.c (revision 163999)
> +++ tree-vect-stmts.c (working copy)
> @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step for store.");
> + return false;
> + }
> +
> if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
> {
> strided_store = true;
> @@ -3412,6 +3419,68 @@ vectorizable_store (gimple stmt, gimple_
> return true;
> }
>
> +/* Given a vector type VECTYPE returns a builtin DECL to be used
> + for vector permutation and stores a mask into *MASK that implements
> + reversal of the vector elements. If that is impossible to do
> + returns NULL (and *MASK is unchanged). */
> +
> +static tree
> +perm_mask_for_reverse (tree vectype, tree *mask)
> +{
> + tree builtin_decl;
> + tree mask_element_type, mask_type;
> + tree mask_vec = NULL;
> + int i;
> + int nunits;
> + if (!targetm.vectorize.builtin_vec_perm)
> + return NULL;
> +
> + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
> +
&mask_element_type);
> + if (!builtin_decl || !mask_element_type)
> + return NULL;
> +
> + mask_type = get_vectype_for_scalar_type (mask_element_type);
> + nunits = TYPE_VECTOR_SUBPARTS (vectype);
> + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS
(mask_type))
> + return NULL;
> +
> + for (i = 0; i < nunits; i++)
> + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type,
> i), mask_vec);
> + mask_vec = build_vector (mask_type, mask_vec);
> +
> + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
> + return NULL;
> + if (mask)
> + *mask = mask_vec;
> + return builtin_decl;
> +}
> +
> +/* Given a vector variable X, that was generated for the scalar LHS of
> + STMT, generate instructions to reverse the vector elements of X,
> + insert them a *GSI and return the permuted vector variable. */
> +
> +static tree
> +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
> +{
> + tree vectype = TREE_TYPE (x);
> + tree mask_vec, builtin_decl;
> + tree perm_dest, data_ref;
> + gimple perm_stmt;
> +
> + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
> +
> + perm_dest = vect_create_destination_var (gimple_assign_lhs
> (stmt), vectype);
> +
> + /* Generate the permute statement. */
> + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
> + data_ref = make_ssa_name (perm_dest, perm_stmt);
> + gimple_call_set_lhs (perm_stmt, data_ref);
> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
> +
> + return data_ref;
> +}
> +
> /* vectorizable_load.
>
> Check if STMT reads a non scalar data-ref (array/pointer/structure)
that
> @@ -3454,6 +3523,7 @@ vectorizable_load (gimple stmt, gimple_s
> gimple first_stmt;
> tree scalar_type;
> bool inv_p;
> + bool negative;
> bool compute_in_loop = false;
> struct loop *at_loop;
> int vec_num;
> @@ -3516,6 +3586,14 @@ vectorizable_load (gimple stmt, gimple_s
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
> + if (negative && ncopies > 1)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "multiple types with negative step.");
> + return false;
> + }
> +
> scalar_type = TREE_TYPE (DR_REF (dr));
> mode = TYPE_MODE (vectype);
>
> @@ -3550,6 +3628,25 @@ vectorizable_load (gimple stmt, gimple_s
> return false;
> }
>
> + if (negative)
> + {
> + gcc_assert (!strided_load);
> + alignment_support_scheme = vect_supportable_dr_alignment (dr,
false);
> + if (alignment_support_scheme != dr_aligned
> + && alignment_support_scheme != dr_unaligned_supported)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step but alignment required.");
> + return false;
> + }
> + if (!perm_mask_for_reverse (vectype, NULL))
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step and reversing not
supported.");
> + return false;
> + }
> + }
> +
> if (!vec_stmt) /* transformation not required. */
> {
> STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> @@ -3724,6 +3821,9 @@ vectorizable_load (gimple stmt, gimple_s
> else
> at_loop = loop;
>
> + if (negative)
> + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
> +
> prev_stmt_info = NULL;
> for (j = 0; j < ncopies; j++)
> {
> @@ -3912,6 +4012,12 @@ vectorizable_load (gimple stmt, gimple_s
> gcc_unreachable (); /* FORNOW. */
> }
>
> + if (negative)
> + {
> + new_temp = reverse_vec_elements (new_temp, stmt, gsi);
> + new_stmt = SSA_NAME_DEF_STMT (new_temp);
> + }
> +
> /* Collect vector loads and later create their permutation in
> vect_transform_strided_load (). */
> if (strided_load || slp_perm)
> Index: testsuite/gcc.dg/vect/vect-114.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-114.c (revision 163997)
> +++ testsuite/gcc.dg/vect/vect-114.c (working copy)
> @@ -34,6 +34,7 @@ int main (void)
> return main1 ();
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } }
*/
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"
> { target { ! vect_perm } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target vect_perm } } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
> Index: testsuite/gcc.dg/vect/slp-perm-8.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-perm-8.c (revision 163997)
> +++ testsuite/gcc.dg/vect/slp-perm-8.c (working copy)
> @@ -53,7 +53,7 @@ int main (int argc, const char* argv[])
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"
> { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP"
> 1 "vect" { target vect_perm } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"
> { target vect_perm_byte } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP"
> 1 "vect" { target vect_perm_byte } } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
> Index: testsuite/gcc.dg/vect/pr43432.c
> ===================================================================
> --- testsuite/gcc.dg/vect/pr43432.c (revision 0)
> +++ testsuite/gcc.dg/vect/pr43432.c (revision 0)
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
> +
> +
> +void vector_fmul_reverse_c(float *dst, const float *src0, const float
*src1,
> +int len){
> + int i;
> + src1 += len-1;
> + for(i=0; i<len; i++)
> + dst[i] = src0[i] * src1[-i];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target { vect_perm } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: testsuite/gcc.dg/vect/slp-perm-9.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-perm-9.c (revision 163997)
> +++ testsuite/gcc.dg/vect/slp-perm-9.c (working copy)
> @@ -54,7 +54,7 @@ int main (int argc, const char* argv[])
> }
>
> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
*/
> -/* { dg-final { scan-tree-dump-times "permutation requires at least
> three vectors" 1 "vect" { target vect_perm } } } */
> +/* { dg-final { scan-tree-dump-times "permutation requires at least
> three vectors" 1 "vect" { target vect_perm_short } } } */
> /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP"
> 0 "vect" } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
> Index: testsuite/gcc.dg/vect/vect-15.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-15.c (revision 163997)
> +++ testsuite/gcc.dg/vect/vect-15.c (working copy)
> @@ -35,5 +35,5 @@ int main (void)
> return main1 ();
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target vect_perm } } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
> Index: testsuite/lib/target-supports.exp
> ===================================================================
> --- testsuite/lib/target-supports.exp (revision 163997)
> +++ testsuite/lib/target-supports.exp (working copy)
> @@ -2366,7 +2366,8 @@ proc check_effective_target_vect_perm {
> } else {
> set et_vect_perm_saved 0
> if { [istarget powerpc*-*-*]
> - || [istarget spu-*-*] } {
> + || [istarget spu-*-*]
> + || [istarget x86_64-*-*] } {
> set et_vect_perm_saved 1
> }
> }
> @@ -2374,6 +2375,48 @@ proc check_effective_target_vect_perm {
> return $et_vect_perm_saved
> }
>
> +# Return 1 if the target plus current options supports vector
permutation
> +# on byte-sized elements, 0 otherwise.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_vect_perm_byte { } {
> + global et_vect_perm_byte
> +
> + if [info exists et_vect_perm_byte_saved] {
> + verbose "check_effective_target_vect_perm_byte: using
> cached result" 2
> + } else {
> + set et_vect_perm_byte_saved 0
> + if { [istarget powerpc*-*-*]
> + || [istarget spu-*-*] } {
> + set et_vect_perm_byte_saved 1
> + }
> + }
> + verbose "check_effective_target_vect_perm_byte: returning
> $et_vect_perm_byte_saved" 2
> + return $et_vect_perm_byte_saved
> +}
> +
> +# Return 1 if the target plus current options supports vector
permutation
> +# on short-sized elements, 0 otherwise.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_vect_perm_short { } {
> + global et_vect_perm_short
> +
> + if [info exists et_vect_perm_short_saved] {
> + verbose "check_effective_target_vect_perm_short: using
> cached result" 2
> + } else {
> + set et_vect_perm_short_saved 0
> + if { [istarget powerpc*-*-*]
> + || [istarget spu-*-*] } {
> + set et_vect_perm_short_saved 1
> + }
> + }
> + verbose "check_effective_target_vect_perm_short: returning
> $et_vect_perm_short_saved" 2
> + return $et_vect_perm_short_saved
> +}
> +
> # Return 1 if the target plus current options supports a vector
> # widening summation of *short* args into *int* result, 0 otherwise.
> #
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-09 15:01 ` Michael Matz
2010-09-09 15:08 ` Paolo Carlini
2010-09-12 11:23 ` Ira Rosen
@ 2010-09-17 16:55 ` H.J. Lu
2010-09-18 16:59 ` H.J. Lu
2 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2010-09-17 16:55 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches, Ira Rosen
On Thu, Sep 9, 2010 at 7:30 AM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Wed, 8 Sep 2010, Ira Rosen wrote:
>
>> > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
>> > target-supports.exp/vect_perm recognition which in turn makes
>> > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
>> > because while x86_64 supports permutation for 4 and 8-sized elements it
>> > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
>> > to investigate this but seek feedback on the patch itself already now.
>> >
>>
>> Looks good to me.
>
> Okay, so here's the complete patch. Functionality is unchanged, but I
> extended the testsuite to have vect_perm_byte and vect_perm_short
> predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can
> enable vect_perm on x86_64. With this one I get no regressions on
> x86_64-linux (all default languages). Okay for trunk?
>
>
> Ciao,
> Michael.
> --
> PR tree-optimization/43432
> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
> Accept backwards consecutive accesses.
> (vect_create_data_ref_ptr): If step is negative generate
> decreasing IVs.
> * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
> (perm_mask_for_reverse, reverse_vec_elements): New functions.
> (vectorizable_load): Handle loads with negative steps when easily
> possible.
>
> testsuite/
> PR tree-optimization/43432
> * lib/target-supports.exp (check_effective_target_vect_perm_byte,
> check_effective_target_vect_perm_short): New predicates.
> (check_effective_target_vect_perm): Include x86_64.
> * gcc.dg/vect/pr43432.c: New test.
> * gcc.dg/vect/vect-114.c: Adjust.
> * gcc.dg/vect/vect-15.c: Ditto.
> * gcc.dg/vect/slp-perm-8.c: Use new predicate.
> * gcc.dg/vect/slp-perm-9.c: Ditto.
>
This may have caused:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45706
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-17 16:55 ` H.J. Lu
@ 2010-09-18 16:59 ` H.J. Lu
2010-09-24 15:31 ` H.J. Lu
0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2010-09-18 16:59 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches, Ira Rosen
On Fri, Sep 17, 2010 at 9:00 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Thu, Sep 9, 2010 at 7:30 AM, Michael Matz <matz@suse.de> wrote:
>> Hi,
>>
>> On Wed, 8 Sep 2010, Ira Rosen wrote:
>>
>>> > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
>>> > target-supports.exp/vect_perm recognition which in turn makes
>>> > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
>>> > because while x86_64 supports permutation for 4 and 8-sized elements it
>>> > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
>>> > to investigate this but seek feedback on the patch itself already now.
>>> >
>>>
>>> Looks good to me.
>>
>> Okay, so here's the complete patch. Functionality is unchanged, but I
>> extended the testsuite to have vect_perm_byte and vect_perm_short
>> predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can
>> enable vect_perm on x86_64. With this one I get no regressions on
>> x86_64-linux (all default languages). Okay for trunk?
>>
>>
>> Ciao,
>> Michael.
>> --
>> PR tree-optimization/43432
>> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
>> Accept backwards consecutive accesses.
>> (vect_create_data_ref_ptr): If step is negative generate
>> decreasing IVs.
>> * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
>> (perm_mask_for_reverse, reverse_vec_elements): New functions.
>> (vectorizable_load): Handle loads with negative steps when easily
>> possible.
>>
>> testsuite/
>> PR tree-optimization/43432
>> * lib/target-supports.exp (check_effective_target_vect_perm_byte,
>> check_effective_target_vect_perm_short): New predicates.
>> (check_effective_target_vect_perm): Include x86_64.
>> * gcc.dg/vect/pr43432.c: New test.
>> * gcc.dg/vect/vect-114.c: Adjust.
>> * gcc.dg/vect/vect-15.c: Ditto.
>> * gcc.dg/vect/slp-perm-8.c: Use new predicate.
>> * gcc.dg/vect/slp-perm-9.c: Ditto.
>>
>
> This may have caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45706
>
This also caused:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45720
--
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-18 16:59 ` H.J. Lu
@ 2010-09-24 15:31 ` H.J. Lu
2010-09-24 17:58 ` Ira Rosen
0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2010-09-24 15:31 UTC (permalink / raw)
To: Michael Matz; +Cc: gcc-patches, Ira Rosen
On Sat, Sep 18, 2010 at 8:57 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> On Fri, Sep 17, 2010 at 9:00 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> On Thu, Sep 9, 2010 at 7:30 AM, Michael Matz <matz@suse.de> wrote:
>>> Hi,
>>>
>>> On Wed, 8 Sep 2010, Ira Rosen wrote:
>>>
>>>> > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
>>>> > target-supports.exp/vect_perm recognition which in turn makes
>>>> > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably
>>>> > because while x86_64 supports permutation for 4 and 8-sized elements it
>>>> > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going
>>>> > to investigate this but seek feedback on the patch itself already now.
>>>> >
>>>>
>>>> Looks good to me.
>>>
>>> Okay, so here's the complete patch. Functionality is unchanged, but I
>>> extended the testsuite to have vect_perm_byte and vect_perm_short
>>> predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can
>>> enable vect_perm on x86_64. With this one I get no regressions on
>>> x86_64-linux (all default languages). Okay for trunk?
>>>
>>>
>>> Ciao,
>>> Michael.
>>> --
>>> PR tree-optimization/43432
>>> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
>>> Accept backwards consecutive accesses.
>>> (vect_create_data_ref_ptr): If step is negative generate
>>> decreasing IVs.
>>> * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
>>> (perm_mask_for_reverse, reverse_vec_elements): New functions.
>>> (vectorizable_load): Handle loads with negative steps when easily
>>> possible.
>>>
>>> testsuite/
>>> PR tree-optimization/43432
>>> * lib/target-supports.exp (check_effective_target_vect_perm_byte,
>>> check_effective_target_vect_perm_short): New predicates.
>>> (check_effective_target_vect_perm): Include x86_64.
>>> * gcc.dg/vect/pr43432.c: New test.
>>> * gcc.dg/vect/vect-114.c: Adjust.
>>> * gcc.dg/vect/vect-15.c: Ditto.
>>> * gcc.dg/vect/slp-perm-8.c: Use new predicate.
>>> * gcc.dg/vect/slp-perm-9.c: Ditto.
>>>
>>
>> This may have caused:
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45706
>>
>
> This also caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45720
It also caused:
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45764
The vectorizer has been pretty much broken for a week. Is anyone
working on it?
--
H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: Fix PR43432, vectorize backwards stepping loads
2010-09-24 15:31 ` H.J. Lu
@ 2010-09-24 17:58 ` Ira Rosen
0 siblings, 0 replies; 11+ messages in thread
From: Ira Rosen @ 2010-09-24 17:58 UTC (permalink / raw)
To: H.J. Lu; +Cc: gcc-patches, Michael Matz
"H.J. Lu" <hjl.tools@gmail.com> wrote on 24/09/2010 06:35:37 AM:
> From: "H.J. Lu" <hjl.tools@gmail.com>
> To: Michael Matz <matz@suse.de>
> Cc: gcc-patches@gcc.gnu.org, Ira Rosen/Haifa/IBM@IBMIL
> Date: 24/09/2010 06:35 AM
> Subject: Re: Fix PR43432, vectorize backwards stepping loads
>
> On Sat, Sep 18, 2010 at 8:57 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> > On Fri, Sep 17, 2010 at 9:00 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> >> On Thu, Sep 9, 2010 at 7:30 AM, Michael Matz <matz@suse.de> wrote:
> >>> Hi,
> >>>
> >>> On Wed, 8 Sep 2010, Ira Rosen wrote:
> >>>
> >>>> > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
> >>>> > target-supports.exp/vect_perm recognition which in turn makes
> >>>> > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's
> most probably
> >>>> > because while x86_64 supports permutation for 4 and 8-sized
elements it
> >>>> > doesn't do so for char (slp-perm-8.c) or short (slp-
> perm-9.c). I'm going
> >>>> > to investigate this but seek feedback on the patch itself already
now.
> >>>> >
> >>>>
> >>>> Looks good to me.
> >>>
> >>> Okay, so here's the complete patch. Functionality is unchanged, but
I
> >>> extended the testsuite to have vect_perm_byte and vect_perm_short
> >>> predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I
can
> >>> enable vect_perm on x86_64. With this one I get no regressions on
> >>> x86_64-linux (all default languages). Okay for trunk?
> >>>
> >>>
> >>> Ciao,
> >>> Michael.
> >>> --
> >>> PR tree-optimization/43432
> >>> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
> >>> Accept backwards consecutive accesses.
> >>> (vect_create_data_ref_ptr): If step is negative generate
> >>> decreasing IVs.
> >>> * tree-vect-stmts.c (vectorizable_store): Reject negative
steps.
> >>> (perm_mask_for_reverse, reverse_vec_elements): New functions.
> >>> (vectorizable_load): Handle loads with negative steps when
easily
> >>> possible.
> >>>
> >>> testsuite/
> >>> PR tree-optimization/43432
> >>> * lib/target-supports.exp
(check_effective_target_vect_perm_byte,
> >>> check_effective_target_vect_perm_short): New predicates.
> >>> (check_effective_target_vect_perm): Include x86_64.
> >>> * gcc.dg/vect/pr43432.c: New test.
> >>> * gcc.dg/vect/vect-114.c: Adjust.
> >>> * gcc.dg/vect/vect-15.c: Ditto.
> >>> * gcc.dg/vect/slp-perm-8.c: Use new predicate.
> >>> * gcc.dg/vect/slp-perm-9.c: Ditto.
> >>>
> >>
> >> This may have caused:
> >>
> >> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45706
> >>
> >
> > This also caused:
> >
> > http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45720
>
>
> It also caused:
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=45764
>
> The vectorizer has been pretty much broken for a week. Is anyone
> working on it?
Unfortunately, I won't be able to help with that till October 3 - I am on
vacation.
Ira
>
>
>
> --
> H.J.
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2010-09-24 11:05 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-07 13:11 Fix PR43432, vectorize backwards stepping loads Michael Matz
2010-09-08 7:51 ` Ira Rosen
2010-09-09 15:01 ` Michael Matz
2010-09-09 15:08 ` Paolo Carlini
2010-09-09 15:22 ` Michael Matz
2010-09-09 16:39 ` Paolo Carlini
2010-09-12 11:23 ` Ira Rosen
2010-09-17 16:55 ` H.J. Lu
2010-09-18 16:59 ` H.J. Lu
2010-09-24 15:31 ` H.J. Lu
2010-09-24 17:58 ` Ira Rosen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).