* [PATCH] Optimize SLP from scalars, workaround PR65961
@ 2015-06-02 7:39 Richard Biener
0 siblings, 0 replies; only message in thread
From: Richard Biener @ 2015-06-02 7:39 UTC (permalink / raw)
To: gcc-patches
The following patch optimizes the case where we decide to build up
all operands from a SLP node from scalars to just build up the result
from scalars. That's usually less expensive and in the PR65961
works around a case that we don't handle correctly (yet).
Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
Richard.
2015-06-01 Richard Biener <rguenther@suse.de>
PR tree-optimization/65961
* tree-vect-slp.c (vect_get_and_check_slp_defs): Remove bogus
check and clarify dump message.
(vect_build_slp_tree): If all children are built up from scalars
build up the parent from scalars instead.
* tree-vect-stmts.c (vect_is_simple_use): Cleanup.
* gcc.dg/torture/pr65961.c: New testcase.
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 223974)
--- gcc/tree-vect-slp.c (working copy)
*************** again:
*** 301,313 ****
oprnd_info = (*oprnds_info)[i];
if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
! &def, &dt)
! || (!def_stmt && dt != vect_constant_def))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "Build SLP failed: can't find def for ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
--- 301,312 ----
oprnd_info = (*oprnds_info)[i];
if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
! &def, &dt))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "Build SLP failed: can't analyze def for ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
*************** vect_build_slp_tree (loop_vec_info loop_
*** 1092,1097 ****
--- 1091,1125 ----
vectorization_factor, matches,
npermutes, &this_tree_size, max_tree_size))
{
+ /* If we have all children of child built up from scalars then just
+ throw that away and build it up this node from scalars. */
+ if (!SLP_TREE_CHILDREN (child).is_empty ())
+ {
+ unsigned int j;
+ slp_tree grandchild;
+
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+ if (grandchild != NULL)
+ break;
+ if (!grandchild)
+ {
+ /* Roll back. */
+ *max_nunits = old_max_nunits;
+ loads->truncate (old_nloads);
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+ vect_free_slp_tree (grandchild);
+ SLP_TREE_CHILDREN (child).truncate (0);
+
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "Building parent vector operands from "
+ "scalars instead\n");
+ oprnd_info->def_stmts = vNULL;
+ vect_free_slp_tree (child);
+ SLP_TREE_CHILDREN (*node).quick_push (NULL);
+ continue;
+ }
+ }
+
oprnd_info->def_stmts = vNULL;
SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 223974)
--- gcc/tree-vect-stmts.c (working copy)
*************** vect_is_simple_use (tree operand, gimple
*** 7878,7892 ****
bb_vec_info bb_vinfo, gimple *def_stmt,
tree *def, enum vect_def_type *dt)
{
- basic_block bb;
- stmt_vec_info stmt_vinfo;
- struct loop *loop = NULL;
-
- if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
-
*def_stmt = NULL;
*def = NULL_TREE;
if (dump_enabled_p ())
{
--- 7878,7886 ----
bb_vec_info bb_vinfo, gimple *def_stmt,
tree *def, enum vect_def_type *dt)
{
*def_stmt = NULL;
*def = NULL_TREE;
+ *dt = vect_unknown_def_type;
if (dump_enabled_p ())
{
*************** vect_is_simple_use (tree operand, gimple
*** 7909,7921 ****
return true;
}
- if (TREE_CODE (operand) == PAREN_EXPR)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
- operand = TREE_OPERAND (operand, 0);
- }
-
if (TREE_CODE (operand) != SSA_NAME)
{
if (dump_enabled_p ())
--- 7903,7908 ----
*************** vect_is_simple_use (tree operand, gimple
*** 7924,7963 ****
return false;
}
! *def_stmt = SSA_NAME_DEF_STMT (operand);
! if (*def_stmt == NULL)
{
! if (dump_enabled_p ())
! dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
! "no def_stmt.\n");
! return false;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
}
! /* Empty stmt is expected only in case of a function argument.
! (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
! if (gimple_nop_p (*def_stmt))
! {
! *def = operand;
! *dt = vect_external_def;
! return true;
! }
!
! bb = gimple_bb (*def_stmt);
!
! if ((loop && !flow_bb_inside_loop_p (loop, bb))
! || (!loop && bb != BB_VINFO_BB (bb_vinfo))
! || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
*dt = vect_external_def;
else
{
! stmt_vinfo = vinfo_for_stmt (*def_stmt);
! if (!loop && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
*dt = vect_external_def;
else
*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
--- 7911,7940 ----
return false;
}
! if (SSA_NAME_IS_DEFAULT_DEF (operand))
{
! *def = operand;
! *dt = vect_external_def;
! return true;
}
+ *def_stmt = SSA_NAME_DEF_STMT (operand);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
}
! basic_block bb = gimple_bb (*def_stmt);
! if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
! || (bb_vinfo
! && (bb != BB_VINFO_BB (bb_vinfo)
! || gimple_code (*def_stmt) == GIMPLE_PHI)))
*dt = vect_external_def;
else
{
! stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
! if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
*dt = vect_external_def;
else
*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
Index: gcc/testsuite/gcc.dg/torture/pr65961.c
===================================================================
*** gcc/testsuite/gcc.dg/torture/pr65961.c (revision 0)
--- gcc/testsuite/gcc.dg/torture/pr65961.c (working copy)
***************
*** 0 ****
--- 1,20 ----
+ /* { dg-do compile } */
+
+ int *a;
+ void
+ foo ()
+ {
+ do
+ {
+ a[16] = (a[1] ^ a[0]) << 1 | a[1];
+ a[17] = (a[0] ^ a[1]) << 1 | a[0];
+ a[18] = (a[0] ^ a[1]) << 1 | a[0];
+ a[19] = (a[0] ^ a[1]) << 1 | a[0];
+ a[20] = (a[0] ^ a[1]) << 1 | a[0];
+ a[21] = (a[0] ^ a[1]) << 1 | a[0];
+ a[22] = (a[0] ^ a[1]) << 1 | a[0];
+ a[23] = (a[20] ^ a[1]) << 1 | a[9];
+ a += 8;
+ }
+ while (1);
+ }
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2015-06-02 7:38 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-06-02 7:39 [PATCH] Optimize SLP from scalars, workaround PR65961 Richard Biener
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).