From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] Remove last permutation restriction for SLP vectorizing
Date: Tue, 09 Jun 2015 14:22:00 -0000 [thread overview]
Message-ID: <alpine.LSU.2.11.1506091615180.30088@zhemvz.fhfr.qr> (raw)
This removes the restriction in place on reductions. To not regress
gfortran.dg/vect/fast-math-pr37021.f90 the patch also implements
SLP permutations for strided group loads. With those two pieces
we can finally SLP vectorize the complex multiplication (which
in the gfortran.dg/vect/fast-math-pr37021.f90 testcase uses
variable stride). Code generated on x86_64 suffers from PR56766,
combine not being able to use sse3_addsubv2df3 because the
RTL doesn't use the expected vec_merge but
(vec_select (vec_concat )) instead.
Previously we vectorized this with unrolling and interleaving.
Bootstrap and regtest running on x86_64-unknown-linux-gnu.
Richard.
2015-06-09 Richard Biener <rguenther@suse.de>
* tree-vect-slp.c (vect_attempt_slp_rearrange_stmts): Split
out from ...
(vect_supported_load_permutation_p): ... here. Handle
supportable permutations in reductions.
* tree-vect-stmts.c (vectorizable_load): Handle SLP permutations
for vectorizing strided group loads.
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 224271)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_slp_rearrange_stmts (slp_tree node,
*** 1326,1331 ****
--- 1270,1336 ----
}
+ /* Attempt to reorder stmts in a reduction chain so that we don't
+ require any load permutation. Return true if that was possible,
+ otherwise return false. */
+
+ static bool
+ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
+ {
+ unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
+ unsigned int i, j;
+ sbitmap load_index;
+ unsigned int lidx;
+ slp_tree node, load;
+
+ /* Compare all the permutation sequences to the first one. We know
+ that at least one load is permuted. */
+ node = SLP_INSTANCE_LOADS (slp_instn)[0];
+ if (!node->load_permutation.exists ())
+ return false;
+ for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
+ {
+ if (!load->load_permutation.exists ())
+ return false;
+ FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
+ if (lidx != node->load_permutation[j])
+ return false;
+ }
+
+ /* Check that the loads in the first sequence are different and there
+ are no gaps between them. */
+ load_index = sbitmap_alloc (group_size);
+ bitmap_clear (load_index);
+ FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
+ {
+ if (bitmap_bit_p (load_index, lidx))
+ {
+ sbitmap_free (load_index);
+ return false;
+ }
+ bitmap_set_bit (load_index, lidx);
+ }
+ for (i = 0; i < group_size; i++)
+ if (!bitmap_bit_p (load_index, i))
+ {
+ sbitmap_free (load_index);
+ return false;
+ }
+ sbitmap_free (load_index);
+
+ /* This permutation is valid for reduction. Since the order of the
+ statements in the nodes is not important unless they are memory
+ accesses, we can rearrange the statements in all the nodes
+ according to the order of the loads. */
+ vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
+ node->load_permutation);
+
+ /* We are done, no actual permutations need to be generated. */
+ FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
+ SLP_TREE_LOAD_PERMUTATION (node).release ();
+ return true;
+ }
+
/* Check if the required load permutations in the SLP instance
SLP_INSTN are supported. */
*************** vect_supported_load_permutation_p (slp_i
*** 1334,1340 ****
{
unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
unsigned int i, j, k, next;
- sbitmap load_index;
slp_tree node;
gimple stmt, load, next_load, first_load;
struct data_reference *dr;
--- 1339,1344 ----
*************** vect_supported_load_permutation_p (slp_i
*** 1369,1427 ****
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* Reduction (there are no data-refs in the root).
! In reduction chain the order of the loads is important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
! slp_tree load;
! unsigned int lidx;
!
! /* Compare all the permutation sequences to the first one. We know
! that at least one load is permuted. */
! node = SLP_INSTANCE_LOADS (slp_instn)[0];
! if (!node->load_permutation.exists ())
! return false;
! for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
! {
! if (!load->load_permutation.exists ())
! return false;
! FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
! if (lidx != node->load_permutation[j])
! return false;
! }
! /* Check that the loads in the first sequence are different and there
! are no gaps between them. */
! load_index = sbitmap_alloc (group_size);
! bitmap_clear (load_index);
! FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
! {
! if (bitmap_bit_p (load_index, lidx))
! {
! sbitmap_free (load_index);
! return false;
! }
! bitmap_set_bit (load_index, lidx);
! }
! for (i = 0; i < group_size; i++)
! if (!bitmap_bit_p (load_index, i))
! {
! sbitmap_free (load_index);
! return false;
! }
! sbitmap_free (load_index);
!
! /* This permutation is valid for reduction. Since the order of the
! statements in the nodes is not important unless they are memory
! accesses, we can rearrange the statements in all the nodes
! according to the order of the loads. */
! vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
! node->load_permutation);
!
! /* We are done, no actual permutations need to be generated. */
! FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
! SLP_TREE_LOAD_PERMUTATION (node).release ();
! return true;
}
/* In basic block vectorization we allow any subchain of an interleaving
--- 1373,1386 ----
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
/* Reduction (there are no data-refs in the root).
! In reduction chain the order of the loads is not important. */
if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
&& !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
{
! if (vect_attempt_slp_rearrange_stmts (slp_instn))
! return true;
! /* Fallthru to general load permutation handling. */
}
/* In basic block vectorization we allow any subchain of an interleaving
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 224271)
--- gcc/tree-vect-stmts.c (working copy)
*************** vectorizable_load (gimple stmt, gimple_s
*** 5995,6003 ****
if ((grouped_load
&& (slp || PURE_SLP_STMT (stmt_info)))
&& (group_size > nunits
! || nunits % group_size != 0
! /* We don't support load permutations. */
! || slp_perm))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n");
--- 5995,6001 ----
if ((grouped_load
&& (slp || PURE_SLP_STMT (stmt_info)))
&& (group_size > nunits
! || nunits % group_size != 0))
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unhandled strided group load\n");
*************** vectorizable_load (gimple stmt, gimple_s
*** 6294,6299 ****
--- 6292,6298 ----
alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
int nloads = nunits;
tree ltype = TREE_TYPE (vectype);
+ auto_vec<tree> dr_chain;
if (slp)
{
nloads = nunits / group_size;
*************** vectorizable_load (gimple stmt, gimple_s
*** 6303,6309 ****
ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! gcc_assert (!slp_perm);
}
for (j = 0; j < ncopies; j++)
{
--- 6302,6309 ----
ltype = vectype;
ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
! if (slp_perm)
! dr_chain.create (ncopies);
}
for (j = 0; j < ncopies; j++)
{
*************** vectorizable_load (gimple stmt, gimple_s
*** 6350,6362 ****
}
if (slp)
! SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
return true;
}
--- 6350,6369 ----
}
if (slp)
! {
! SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
! if (slp_perm)
! dr_chain.quick_push (gimple_assign_lhs (new_stmt));
! }
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
+ if (slp_perm)
+ vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+ slp_node_instance, false);
return true;
}
reply other threads:[~2015-06-09 14:21 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.LSU.2.11.1506091615180.30088@zhemvz.fhfr.qr \
--to=rguenther@suse.de \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).