From: Ira Rosen <IRAR@il.ibm.com>
To: Michael Matz <matz@suse.de>
Cc: gcc-patches@gcc.gnu.org
Subject: Re: Fix PR43432, vectorize backwards stepping loads
Date: Sun, 12 Sep 2010 11:23:00 -0000 [thread overview]
Message-ID: <OF2DD94F39.DAFC3429-ONC225779C.0024FAB3-C225779C.00250CCC@il.ibm.com> (raw)
In-Reply-To: <Pine.LNX.4.64.1009091628110.29722@wotan.suse.de>
Michael Matz <matz@suse.de> wrote on 09/09/2010 05:30:28 PM:
> Hi,
>
> On Wed, 8 Sep 2010, Ira Rosen wrote:
>
> > > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to
> > > target-supports.exp/vect_perm recognition which in turn makes
> > > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most
probably
> > > because while x86_64 supports permutation for 4 and 8-sized elements
it
> > > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm
going
> > > to investigate this but seek feedback on the patch itself already
now.
> > >
> >
> > Looks good to me.
>
> Okay, so here's the complete patch. Functionality is unchanged, but I
> extended the testsuite to have vect_perm_byte and vect_perm_short
> predicates (using them in slp-perm-8.c and slp-perm-9.c), so that I can
> enable vect_perm on x86_64. With this one I get no regressions on
> x86_64-linux (all default languages). Okay for trunk?
Yes.
Thanks,
Ira
>
>
> Ciao,
> Michael.
> --
> PR tree-optimization/43432
> * tree-vect-data-refs.c (vect_analyze_data_ref_access):
> Accept backwards consecutive accesses.
> (vect_create_data_ref_ptr): If step is negative generate
> decreasing IVs.
> * tree-vect-stmts.c (vectorizable_store): Reject negative steps.
> (perm_mask_for_reverse, reverse_vec_elements): New functions.
> (vectorizable_load): Handle loads with negative steps when easily
> possible.
>
> testsuite/
> PR tree-optimization/43432
> * lib/target-supports.exp (check_effective_target_vect_perm_byte,
> check_effective_target_vect_perm_short): New predicates.
> (check_effective_target_vect_perm): Include x86_64.
> * gcc.dg/vect/pr43432.c: New test.
> * gcc.dg/vect/vect-114.c: Adjust.
> * gcc.dg/vect/vect-15.c: Ditto.
> * gcc.dg/vect/slp-perm-8.c: Use new predicate.
> * gcc.dg/vect/slp-perm-9.c: Ditto.
>
> Index: tree-vect-data-refs.c
> ===================================================================
> --- tree-vect-data-refs.c (revision 163997)
> +++ tree-vect-data-refs.c (working copy)
> @@ -2282,7 +2282,9 @@ vect_analyze_data_ref_access (struct dat
> }
>
> /* Consecutive? */
> - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
> + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
> + || (dr_step < 0
> + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
> {
> /* Mark that it is not interleaving. */
> DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
> @@ -2954,6 +2956,7 @@ vect_create_data_ref_ptr (gimple stmt, s
> tree vptr;
> gimple_stmt_iterator incr_gsi;
> bool insert_after;
> + bool negative;
> tree indx_before_incr, indx_after_incr;
> gimple incr;
> tree step;
> @@ -2986,6 +2989,7 @@ vect_create_data_ref_ptr (gimple stmt, s
> *inv_p = true;
> else
> *inv_p = false;
> + negative = tree_int_cst_compare (step, size_zero_node) < 0;
>
> /* Create an expression for the first address accessed by this load
> in LOOP. */
> @@ -3145,6 +3149,8 @@ vect_create_data_ref_ptr (gimple stmt, s
> LOOP is zero. In this case the step here is also zero. */
> if (*inv_p)
> step = size_zero_node;
> + else if (negative)
> + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
>
> standard_iv_increment_position (loop, &incr_gsi, &insert_after);
>
> Index: tree-vect-stmts.c
> ===================================================================
> --- tree-vect-stmts.c (revision 163999)
> +++ tree-vect-stmts.c (working copy)
> @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step for store.");
> + return false;
> + }
> +
> if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
> {
> strided_store = true;
> @@ -3412,6 +3419,68 @@ vectorizable_store (gimple stmt, gimple_
> return true;
> }
>
> +/* Given a vector type VECTYPE returns a builtin DECL to be used
> + for vector permutation and stores a mask into *MASK that implements
> + reversal of the vector elements. If that is impossible to do
> + returns NULL (and *MASK is unchanged). */
> +
> +static tree
> +perm_mask_for_reverse (tree vectype, tree *mask)
> +{
> + tree builtin_decl;
> + tree mask_element_type, mask_type;
> + tree mask_vec = NULL;
> + int i;
> + int nunits;
> + if (!targetm.vectorize.builtin_vec_perm)
> + return NULL;
> +
> + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
> +
&mask_element_type);
> + if (!builtin_decl || !mask_element_type)
> + return NULL;
> +
> + mask_type = get_vectype_for_scalar_type (mask_element_type);
> + nunits = TYPE_VECTOR_SUBPARTS (vectype);
> + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS
(mask_type))
> + return NULL;
> +
> + for (i = 0; i < nunits; i++)
> + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type,
> i), mask_vec);
> + mask_vec = build_vector (mask_type, mask_vec);
> +
> + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
> + return NULL;
> + if (mask)
> + *mask = mask_vec;
> + return builtin_decl;
> +}
> +
> +/* Given a vector variable X, that was generated for the scalar LHS of
> + STMT, generate instructions to reverse the vector elements of X,
> + insert them a *GSI and return the permuted vector variable. */
> +
> +static tree
> +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
> +{
> + tree vectype = TREE_TYPE (x);
> + tree mask_vec, builtin_decl;
> + tree perm_dest, data_ref;
> + gimple perm_stmt;
> +
> + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
> +
> + perm_dest = vect_create_destination_var (gimple_assign_lhs
> (stmt), vectype);
> +
> + /* Generate the permute statement. */
> + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
> + data_ref = make_ssa_name (perm_dest, perm_stmt);
> + gimple_call_set_lhs (perm_stmt, data_ref);
> + vect_finish_stmt_generation (stmt, perm_stmt, gsi);
> +
> + return data_ref;
> +}
> +
> /* vectorizable_load.
>
> Check if STMT reads a non scalar data-ref (array/pointer/structure)
that
> @@ -3454,6 +3523,7 @@ vectorizable_load (gimple stmt, gimple_s
> gimple first_stmt;
> tree scalar_type;
> bool inv_p;
> + bool negative;
> bool compute_in_loop = false;
> struct loop *at_loop;
> int vec_num;
> @@ -3516,6 +3586,14 @@ vectorizable_load (gimple stmt, gimple_s
> if (!STMT_VINFO_DATA_REF (stmt_info))
> return false;
>
> + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
> + if (negative && ncopies > 1)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "multiple types with negative step.");
> + return false;
> + }
> +
> scalar_type = TREE_TYPE (DR_REF (dr));
> mode = TYPE_MODE (vectype);
>
> @@ -3550,6 +3628,25 @@ vectorizable_load (gimple stmt, gimple_s
> return false;
> }
>
> + if (negative)
> + {
> + gcc_assert (!strided_load);
> + alignment_support_scheme = vect_supportable_dr_alignment (dr,
false);
> + if (alignment_support_scheme != dr_aligned
> + && alignment_support_scheme != dr_unaligned_supported)
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step but alignment required.");
> + return false;
> + }
> + if (!perm_mask_for_reverse (vectype, NULL))
> + {
> + if (vect_print_dump_info (REPORT_DETAILS))
> + fprintf (vect_dump, "negative step and reversing not
supported.");
> + return false;
> + }
> + }
> +
> if (!vec_stmt) /* transformation not required. */
> {
> STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
> @@ -3724,6 +3821,9 @@ vectorizable_load (gimple stmt, gimple_s
> else
> at_loop = loop;
>
> + if (negative)
> + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
> +
> prev_stmt_info = NULL;
> for (j = 0; j < ncopies; j++)
> {
> @@ -3912,6 +4012,12 @@ vectorizable_load (gimple stmt, gimple_s
> gcc_unreachable (); /* FORNOW. */
> }
>
> + if (negative)
> + {
> + new_temp = reverse_vec_elements (new_temp, stmt, gsi);
> + new_stmt = SSA_NAME_DEF_STMT (new_temp);
> + }
> +
> /* Collect vector loads and later create their permutation in
> vect_transform_strided_load (). */
> if (strided_load || slp_perm)
> Index: testsuite/gcc.dg/vect/vect-114.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-114.c (revision 163997)
> +++ testsuite/gcc.dg/vect/vect-114.c (working copy)
> @@ -34,6 +34,7 @@ int main (void)
> return main1 ();
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } }
*/
> +/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect"
> { target { ! vect_perm } } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target vect_perm } } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
> Index: testsuite/gcc.dg/vect/slp-perm-8.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-perm-8.c (revision 163997)
> +++ testsuite/gcc.dg/vect/slp-perm-8.c (working copy)
> @@ -53,7 +53,7 @@ int main (int argc, const char* argv[])
> return 0;
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"
> { target vect_perm } } } */
> -/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP"
> 1 "vect" { target vect_perm } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"
> { target vect_perm_byte } } } */
> +/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP"
> 1 "vect" { target vect_perm_byte } } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
> Index: testsuite/gcc.dg/vect/pr43432.c
> ===================================================================
> --- testsuite/gcc.dg/vect/pr43432.c (revision 0)
> +++ testsuite/gcc.dg/vect/pr43432.c (revision 0)
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target vect_float } */
> +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
> +
> +
> +void vector_fmul_reverse_c(float *dst, const float *src0, const float
*src1,
> +int len){
> + int i;
> + src1 += len-1;
> + for(i=0; i<len; i++)
> + dst[i] = src0[i] * src1[-i];
> +}
> +
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target { vect_perm } } } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> Index: testsuite/gcc.dg/vect/slp-perm-9.c
> ===================================================================
> --- testsuite/gcc.dg/vect/slp-perm-9.c (revision 163997)
> +++ testsuite/gcc.dg/vect/slp-perm-9.c (working copy)
> @@ -54,7 +54,7 @@ int main (int argc, const char* argv[])
> }
>
> /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
*/
> -/* { dg-final { scan-tree-dump-times "permutation requires at least
> three vectors" 1 "vect" { target vect_perm } } } */
> +/* { dg-final { scan-tree-dump-times "permutation requires at least
> three vectors" 1 "vect" { target vect_perm_short } } } */
> /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP"
> 0 "vect" } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
>
> Index: testsuite/gcc.dg/vect/vect-15.c
> ===================================================================
> --- testsuite/gcc.dg/vect/vect-15.c (revision 163997)
> +++ testsuite/gcc.dg/vect/vect-15.c (working copy)
> @@ -35,5 +35,5 @@ int main (void)
> return main1 ();
> }
>
> -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { xfail *-*-* } } } */
> +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"
> { target vect_perm } } } */
> /* { dg-final { cleanup-tree-dump "vect" } } */
> Index: testsuite/lib/target-supports.exp
> ===================================================================
> --- testsuite/lib/target-supports.exp (revision 163997)
> +++ testsuite/lib/target-supports.exp (working copy)
> @@ -2366,7 +2366,8 @@ proc check_effective_target_vect_perm {
> } else {
> set et_vect_perm_saved 0
> if { [istarget powerpc*-*-*]
> - || [istarget spu-*-*] } {
> + || [istarget spu-*-*]
> + || [istarget x86_64-*-*] } {
> set et_vect_perm_saved 1
> }
> }
> @@ -2374,6 +2375,48 @@ proc check_effective_target_vect_perm {
> return $et_vect_perm_saved
> }
>
> +# Return 1 if the target plus current options supports vector
permutation
> +# on byte-sized elements, 0 otherwise.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_vect_perm_byte { } {
> + global et_vect_perm_byte
> +
> + if [info exists et_vect_perm_byte_saved] {
> + verbose "check_effective_target_vect_perm_byte: using
> cached result" 2
> + } else {
> + set et_vect_perm_byte_saved 0
> + if { [istarget powerpc*-*-*]
> + || [istarget spu-*-*] } {
> + set et_vect_perm_byte_saved 1
> + }
> + }
> + verbose "check_effective_target_vect_perm_byte: returning
> $et_vect_perm_byte_saved" 2
> + return $et_vect_perm_byte_saved
> +}
> +
> +# Return 1 if the target plus current options supports vector
permutation
> +# on short-sized elements, 0 otherwise.
> +#
> +# This won't change for different subtargets so cache the result.
> +
> +proc check_effective_target_vect_perm_short { } {
> + global et_vect_perm_short
> +
> + if [info exists et_vect_perm_short_saved] {
> + verbose "check_effective_target_vect_perm_short: using
> cached result" 2
> + } else {
> + set et_vect_perm_short_saved 0
> + if { [istarget powerpc*-*-*]
> + || [istarget spu-*-*] } {
> + set et_vect_perm_short_saved 1
> + }
> + }
> + verbose "check_effective_target_vect_perm_short: returning
> $et_vect_perm_short_saved" 2
> + return $et_vect_perm_short_saved
> +}
> +
> # Return 1 if the target plus current options supports a vector
> # widening summation of *short* args into *int* result, 0 otherwise.
> #
next prev parent reply other threads:[~2010-09-12 6:44 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-09-07 13:11 Michael Matz
2010-09-08 7:51 ` Ira Rosen
2010-09-09 15:01 ` Michael Matz
2010-09-09 15:08 ` Paolo Carlini
2010-09-09 15:22 ` Michael Matz
2010-09-09 16:39 ` Paolo Carlini
2010-09-12 11:23 ` Ira Rosen [this message]
2010-09-17 16:55 ` H.J. Lu
2010-09-18 16:59 ` H.J. Lu
2010-09-24 15:31 ` H.J. Lu
2010-09-24 17:58 ` Ira Rosen
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=OF2DD94F39.DAFC3429-ONC225779C.0024FAB3-C225779C.00250CCC@il.ibm.com \
--to=irar@il.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=matz@suse.de \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).