* [PATCH] Fix PR68852
@ 2015-12-14 15:14 Richard Biener
2015-12-14 15:34 ` Richard Biener
2015-12-17 16:31 ` Kyrill Tkachov
0 siblings, 2 replies; 5+ messages in thread
From: Richard Biener @ 2015-12-14 15:14 UTC (permalink / raw)
To: gcc-patches
The following fixes PR68852 - so I finally needed to sit down and
fix the "build-from-scalars" hack in the SLP vectorizer by pretending
we'd have a sane vectorizer IL. Basically I now mark the SLP node
with a proper vect_def_type but I have to push that down to the
stmt-info level whenever sth would look at it.
It's a bit ugly but not too much yet ;)
Anyway, the proper fix is to have a sane data structure, nothing for
GCC 6 though.
Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
Verified SPEC CPU 2006 is happy with the patch.
Richard.
2015-12-14 Richard Biener <rguenther@suse.de>
PR tree-optimization/68852
* tree-vectorizer.h (struct _slp_tree): Add def_type member.
(SLP_TREE_DEF_TYPE): New accessor.
* tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
hack.
* tree-vect-slp.c (vect_create_new_slp_node): Initialize
SLP_TREE_DEF_TYPE.
(vect_build_slp_tree): When a node is to be built up from scalars
do not push a NULL as child but instead set its def_type to
vect_external_def.
(vect_analyze_slp_cost_1): Check for child def-type instead
of NULL.
(vect_detect_hybrid_slp_stmts): Likewise.
(vect_bb_slp_scalar_cost): Likewise.
(vect_get_slp_defs): Likewise.
(vect_slp_analyze_node_operations): Likewise. Before
processing node push the children def-types to the underlying
stmts vinfo and restore it afterwards.
(vect_schedule_slp_instance): Likewise.
(vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
as not vectorizable.
* g++.dg/torture/pr68852.C: New testcase.
Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h (revision 231552)
--- gcc/tree-vectorizer.h (working copy)
*************** struct _slp_tree {
*** 107,112 ****
--- 107,114 ----
unsigned int vec_stmts_size;
/* Whether the scalar computations use two different operators. */
bool two_operators;
+ /* The DEF type of this node. */
+ enum vect_def_type def_type;
};
*************** typedef struct _slp_instance {
*** 139,144 ****
--- 141,147 ----
#define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
#define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
#define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
+ #define SLP_TREE_DEF_TYPE(S) (S)->def_type
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c (revision 231552)
--- gcc/tree-vect-stmts.c (working copy)
*************** vect_is_simple_use (tree operand, vec_in
*** 8649,8658 ****
else
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
! if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
! *dt = vect_external_def;
! else
! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
}
if (dump_enabled_p ())
--- 8652,8658 ----
else
{
stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
}
if (dump_enabled_p ())
Index: gcc/testsuite/g++.dg/torture/pr68852.C
===================================================================
--- gcc/testsuite/g++.dg/torture/pr68852.C (revision 0)
+++ gcc/testsuite/g++.dg/torture/pr68852.C (working copy)
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+
+struct A {
+ double x, y, z, w;
+ A() {}
+ A(double, double p2, double p3, double) : y(p2), z(p3) {}
+ void m_fn1();
+};
+
+struct B {
+ double x, y;
+};
+struct D : A {
+ D() {}
+ D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
+};
+
+class C {
+public:
+ float _11, _12, _13, _14;
+ float _21, _22, _23, _24;
+ float _31, _32, _33, _34;
+ float _41, _42, _43, _44;
+ D m_fn2(B p1) {
+ double z(p1.x + _43);
+ return *this * D(p1.x, p1.y, z, 1);
+ }
+ int ProjectRectBounds_next;
+ B __trans_tmp_3;
+ int m_fn3(int) {
+ B a, b;
+ D c[1];
+ b = __trans_tmp_3;
+ c[2] = m_fn2(b);
+ c[3] = m_fn2(a);
+ c[ProjectRectBounds_next].m_fn1();
+ }
+ D operator*(D p1) {
+ D d;
+ d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
+ d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
+ d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
+ d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
+ return d;
+ }
+};
+
+void fn1() {
+ C e;
+ int f = e.m_fn3(f);
+}
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c (revision 231610)
--- gcc/tree-vect-slp.c (working copy)
*************** vect_free_slp_tree (slp_tree node)
*** 51,59 ****
int i;
slp_tree child;
- if (!node)
- return;
-
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_free_slp_tree (child);
--- 51,56 ----
*************** vect_create_new_slp_node (vec<gimple *>
*** 103,108 ****
--- 100,106 ----
SLP_TREE_CHILDREN (node).create (nops);
SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
SLP_TREE_TWO_OPERATORS (node) = false;
+ SLP_TREE_DEF_TYPE (node) = vect_internal_def;
return node;
}
*************** vect_build_slp_tree (vec_info *vinfo,
*** 938,944 ****
slp_tree grandchild;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! if (grandchild != NULL)
break;
if (!grandchild)
{
--- 936,942 ----
slp_tree grandchild;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
break;
if (!grandchild)
{
*************** vect_build_slp_tree (vec_info *vinfo,
*** 946,960 ****
*max_nunits = old_max_nunits;
loads->truncate (old_nloads);
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! vect_free_slp_tree (grandchild);
SLP_TREE_CHILDREN (child).truncate (0);
dump_printf_loc (MSG_NOTE, vect_location,
"Building parent vector operands from "
"scalars instead\n");
oprnd_info->def_stmts = vNULL;
! vect_free_slp_tree (child);
! SLP_TREE_CHILDREN (*node).quick_push (NULL);
continue;
}
}
--- 944,958 ----
*max_nunits = old_max_nunits;
loads->truncate (old_nloads);
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! vect_free_slp_tree (grandchild);
SLP_TREE_CHILDREN (child).truncate (0);
dump_printf_loc (MSG_NOTE, vect_location,
"Building parent vector operands from "
"scalars instead\n");
oprnd_info->def_stmts = vNULL;
! SLP_TREE_DEF_TYPE (child) = vect_external_def;
! SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
}
}
*************** vect_build_slp_tree (vec_info *vinfo,
*** 992,999 ****
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands from scalars\n");
oprnd_info->def_stmts = vNULL;
! vect_free_slp_tree (child);
! SLP_TREE_CHILDREN (*node).quick_push (NULL);
continue;
}
--- 990,997 ----
dump_printf_loc (MSG_NOTE, vect_location,
"Building vector operands from scalars\n");
oprnd_info->def_stmts = vNULL;
! SLP_TREE_DEF_TYPE (child) = vect_external_def;
! SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
}
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1044,1049 ****
--- 1042,1061 ----
tem, npermutes, &this_tree_size,
max_tree_size))
{
+ /* ... so if successful we can apply the operand swapping
+ to the GIMPLE IL. This is necessary because for example
+ vect_get_slp_defs uses operand indexes and thus expects
+ canonical operand order. This is also necessary even
+ if we end up building the operand from scalars as
+ we'll continue to process swapped operand two. */
+ for (j = 0; j < group_size; ++j)
+ if (!matches[j])
+ {
+ gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
+ swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+ gimple_assign_rhs2_ptr (stmt));
+ }
+
/* If we have all children of child built up from scalars then
just throw that away and build it up this node from scalars. */
if (!SLP_TREE_CHILDREN (child).is_empty ())
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1052,1058 ****
slp_tree grandchild;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! if (grandchild != NULL)
break;
if (!grandchild)
{
--- 1064,1070 ----
slp_tree grandchild;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
break;
if (!grandchild)
{
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1067,1089 ****
"Building parent vector operands from "
"scalars instead\n");
oprnd_info->def_stmts = vNULL;
! vect_free_slp_tree (child);
! SLP_TREE_CHILDREN (*node).quick_push (NULL);
continue;
}
}
- /* ... so if successful we can apply the operand swapping
- to the GIMPLE IL. This is necessary because for example
- vect_get_slp_defs uses operand indexes and thus expects
- canonical operand order. */
- for (j = 0; j < group_size; ++j)
- if (!matches[j])
- {
- gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
- swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
- gimple_assign_rhs2_ptr (stmt));
- }
oprnd_info->def_stmts = vNULL;
SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
--- 1079,1090 ----
"Building parent vector operands from "
"scalars instead\n");
oprnd_info->def_stmts = vNULL;
! SLP_TREE_DEF_TYPE (child) = vect_external_def;
! SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
}
}
oprnd_info->def_stmts = vNULL;
SLP_TREE_CHILDREN (*node).quick_push (child);
continue;
*************** vect_print_slp_tree (int dump_kind, loca
*** 1114,1123 ****
gimple *stmt;
slp_tree child;
! if (!node)
! return;
!
! dump_printf_loc (dump_kind, loc, "node\n");
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
--- 1115,1123 ----
gimple *stmt;
slp_tree child;
! dump_printf_loc (dump_kind, loc, "node%s\n",
! SLP_TREE_DEF_TYPE (node) != vect_internal_def
! ? " (external)" : "");
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
*************** vect_mark_slp_stmts (slp_tree node, enum
*** 1140,1146 ****
gimple *stmt;
slp_tree child;
! if (!node)
return;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
--- 1140,1146 ----
gimple *stmt;
slp_tree child;
! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
*************** vect_mark_slp_stmts_relevant (slp_tree n
*** 1162,1168 ****
stmt_vec_info stmt_info;
slp_tree child;
! if (!node)
return;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
--- 1162,1168 ----
stmt_vec_info stmt_info;
slp_tree child;
! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1400,1406 ****
stmt_vector_for_cost *body_cost_vec,
unsigned ncopies_for_cost)
{
! unsigned i;
slp_tree child;
gimple *stmt, *s;
stmt_vec_info stmt_info;
--- 1400,1406 ----
stmt_vector_for_cost *body_cost_vec,
unsigned ncopies_for_cost)
{
! unsigned i, j;
slp_tree child;
gimple *stmt, *s;
stmt_vec_info stmt_info;
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1409,1415 ****
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
! if (child)
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
body_cost_vec, ncopies_for_cost);
--- 1409,1415 ----
/* Recurse down the SLP tree. */
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
body_cost_vec, ncopies_for_cost);
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1464,1472 ****
--- 1464,1479 ----
}
}
+ /* Push SLP node def-type to stmts. */
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
+
/* Scan operands and account for prologue cost of constants/externals.
??? This over-estimates cost for multiple uses and should be
re-engineered. */
+ stmt = SLP_TREE_SCALAR_STMTS (node)[0];
lhs = gimple_get_lhs (stmt);
for (i = 0; i < gimple_num_ops (stmt); ++i)
{
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1489,1494 ****
--- 1496,1507 ----
stmt_info, 0, vect_prologue);
}
}
+
+ /* Restore stmt def-types. */
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
}
/* Compute the cost for the SLP instance INSTANCE. */
*************** vect_analyze_slp_instance (vec_info *vin
*** 1795,1800 ****
--- 1808,1840 ----
}
}
+ /* If the loads and stores can be handled with load/store-lane
+ instructions do not generate this SLP instance. */
+ if (is_a <loop_vec_info> (vinfo)
+ && loads_permuted
+ && dr && vect_store_lanes_supported (vectype, group_size))
+ {
+ slp_tree load_node;
+ FOR_EACH_VEC_ELT (loads, i, load_node)
+ {
+ gimple *first_stmt = GROUP_FIRST_ELEMENT
+ (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
+ if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo),
+ GROUP_SIZE (stmt_vinfo)))
+ break;
+ }
+ if (i == loads.length ())
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Built SLP cancelled: can use "
+ "load/store-lanes\n");
+ vect_free_slp_instance (new_instance);
+ return false;
+ }
+ }
+
vinfo->slp_instances.safe_push (new_instance);
if (dump_enabled_p ())
*************** vect_detect_hybrid_slp_stmts (slp_tree n
*** 2004,2010 ****
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
! if (child)
vect_detect_hybrid_slp_stmts (child, i, stype);
}
--- 2044,2050 ----
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
! if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
vect_detect_hybrid_slp_stmts (child, i, stype);
}
*************** static bool
*** 2185,2201 ****
vect_slp_analyze_node_operations (slp_tree node)
{
bool dummy;
! int i;
gimple *stmt;
slp_tree child;
! if (!node)
return true;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (!vect_slp_analyze_node_operations (child))
return false;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
--- 2225,2248 ----
vect_slp_analyze_node_operations (slp_tree node)
{
bool dummy;
! int i, j;
gimple *stmt;
slp_tree child;
! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return true;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (!vect_slp_analyze_node_operations (child))
return false;
+ /* Push SLP node def-type to stmts. */
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
+
+ bool res = true;
FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
*************** vect_slp_analyze_node_operations (slp_tr
*** 2203,2212 ****
gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
if (!vect_analyze_stmt (stmt, &dummy, node))
! return false;
}
! return true;
}
--- 2250,2268 ----
gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
if (!vect_analyze_stmt (stmt, &dummy, node))
! {
! res = false;
! break;
! }
}
! /* Restore stmt def-types. */
! FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
! if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
!
! return res;
}
*************** vect_bb_slp_scalar_cost (basic_block bb,
*** 2286,2292 ****
if (!is_gimple_debug (use_stmt)
&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
use_stmt)
! || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
{
(*life)[i] = true;
BREAK_FROM_IMM_USE_STMT (use_iter);
--- 2342,2348 ----
if (!is_gimple_debug (use_stmt)
&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
use_stmt)
! || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
{
(*life)[i] = true;
BREAK_FROM_IMM_USE_STMT (use_iter);
*************** vect_bb_slp_scalar_cost (basic_block bb,
*** 2310,2316 ****
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
! if (child)
scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
return scalar_cost;
--- 2366,2372 ----
}
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
return scalar_cost;
*************** vect_slp_analyze_bb_1 (gimple_stmt_itera
*** 2499,2513 ****
return NULL;
}
- /* Mark all the statements that we do not want to vectorize. */
- for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
- gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
- {
- stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
- if (STMT_SLP_TYPE (vinfo) != pure_slp)
- STMT_VINFO_VECTORIZABLE (vinfo) = false;
- }
-
if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
{
--- 2555,2560 ----
*************** vect_get_slp_defs (vec<tree> ops, slp_tr
*** 3085,3091 ****
child = SLP_TREE_CHILDREN (slp_node)[child_index];
/* We have to check both pattern and original def, if available. */
! if (child)
{
gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
gimple *related
--- 3132,3138 ----
child = SLP_TREE_CHILDREN (slp_node)[child_index];
/* We have to check both pattern and original def, if available. */
! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
{
gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
gimple *related
*************** vect_schedule_slp_instance (slp_tree nod
*** 3374,3388 ****
stmt_vec_info stmt_info;
unsigned int vec_stmts_size, nunits, group_size;
tree vectype;
! int i;
slp_tree child;
! if (!node)
return false;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_schedule_slp_instance (child, instance, vectorization_factor);
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
stmt_info = vinfo_for_stmt (stmt);
--- 3421,3441 ----
stmt_vec_info stmt_info;
unsigned int vec_stmts_size, nunits, group_size;
tree vectype;
! int i, j;
slp_tree child;
! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return false;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
vect_schedule_slp_instance (child, instance, vectorization_factor);
+ /* Push SLP node def-type to stmts. */
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
+
stmt = SLP_TREE_SCALAR_STMTS (node)[0];
stmt_info = vinfo_for_stmt (stmt);
*************** vect_schedule_slp_instance (slp_tree nod
*** 3501,3506 ****
--- 3554,3566 ----
}
}
is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
+
+ /* Restore stmt def-types. */
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+ FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
+
return is_store;
}
*************** vect_remove_slp_scalar_calls (slp_tree n
*** 3519,3525 ****
tree lhs;
stmt_vec_info stmt_info;
! if (!node)
return;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
--- 3579,3585 ----
tree lhs;
stmt_vec_info stmt_info;
! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
return;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix PR68852
2015-12-14 15:14 [PATCH] Fix PR68852 Richard Biener
@ 2015-12-14 15:34 ` Richard Biener
2015-12-17 16:31 ` Kyrill Tkachov
1 sibling, 0 replies; 5+ messages in thread
From: Richard Biener @ 2015-12-14 15:34 UTC (permalink / raw)
To: gcc-patches
On Mon, 14 Dec 2015, Richard Biener wrote:
>
> The following fixes PR68852 - so I finally needed to sit down and
> fix the "build-from-scalars" hack in the SLP vectorizer by pretending
> we'd have a sane vectorizer IL. Basically I now mark the SLP node
> with a proper vect_def_type but I have to push that down to the
> stmt-info level whenever sth would look at it.
>
> It's a bit ugly but not too much yet ;)
>
> Anyway, the proper fix is to have a sane data structure, nothing for
> GCC 6 though.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Verified SPEC CPU 2006 is happy with the patch.
Ick. I reverted the acidentially applied fix for PR68707 that went
with this patch. The other unrelated hunk was already applied
as fix for PR68775.
Richard.
> Richard.
>
> 2015-12-14 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/68852
> * tree-vectorizer.h (struct _slp_tree): Add def_type member.
> (SLP_TREE_DEF_TYPE): New accessor.
> * tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
> hack.
> * tree-vect-slp.c (vect_create_new_slp_node): Initialize
> SLP_TREE_DEF_TYPE.
> (vect_build_slp_tree): When a node is to be built up from scalars
> do not push a NULL as child but instead set its def_type to
> vect_external_def.
> (vect_analyze_slp_cost_1): Check for child def-type instead
> of NULL.
> (vect_detect_hybrid_slp_stmts): Likewise.
> (vect_bb_slp_scalar_cost): Likewise.
> (vect_get_slp_defs): Likewise.
> (vect_slp_analyze_node_operations): Likewise. Before
> processing node push the children def-types to the underlying
> stmts vinfo and restore it afterwards.
> (vect_schedule_slp_instance): Likewise.
> (vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
> as not vectorizable.
>
> * g++.dg/torture/pr68852.C: New testcase.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> *** gcc/tree-vectorizer.h (revision 231552)
> --- gcc/tree-vectorizer.h (working copy)
> *************** struct _slp_tree {
> *** 107,112 ****
> --- 107,114 ----
> unsigned int vec_stmts_size;
> /* Whether the scalar computations use two different operators. */
> bool two_operators;
> + /* The DEF type of this node. */
> + enum vect_def_type def_type;
> };
>
>
> *************** typedef struct _slp_instance {
> *** 139,144 ****
> --- 141,147 ----
> #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
> #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
> #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
> + #define SLP_TREE_DEF_TYPE(S) (S)->def_type
>
>
>
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c (revision 231552)
> --- gcc/tree-vect-stmts.c (working copy)
> *************** vect_is_simple_use (tree operand, vec_in
> *** 8649,8658 ****
> else
> {
> stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> ! if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
> ! *dt = vect_external_def;
> ! else
> ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> }
>
> if (dump_enabled_p ())
> --- 8652,8658 ----
> else
> {
> stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> }
>
> if (dump_enabled_p ())
> Index: gcc/testsuite/g++.dg/torture/pr68852.C
> ===================================================================
> --- gcc/testsuite/g++.dg/torture/pr68852.C (revision 0)
> +++ gcc/testsuite/g++.dg/torture/pr68852.C (working copy)
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +
> +struct A {
> + double x, y, z, w;
> + A() {}
> + A(double, double p2, double p3, double) : y(p2), z(p3) {}
> + void m_fn1();
> +};
> +
> +struct B {
> + double x, y;
> +};
> +struct D : A {
> + D() {}
> + D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
> +};
> +
> +class C {
> +public:
> + float _11, _12, _13, _14;
> + float _21, _22, _23, _24;
> + float _31, _32, _33, _34;
> + float _41, _42, _43, _44;
> + D m_fn2(B p1) {
> + double z(p1.x + _43);
> + return *this * D(p1.x, p1.y, z, 1);
> + }
> + int ProjectRectBounds_next;
> + B __trans_tmp_3;
> + int m_fn3(int) {
> + B a, b;
> + D c[1];
> + b = __trans_tmp_3;
> + c[2] = m_fn2(b);
> + c[3] = m_fn2(a);
> + c[ProjectRectBounds_next].m_fn1();
> + }
> + D operator*(D p1) {
> + D d;
> + d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
> + d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
> + d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
> + d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
> + return d;
> + }
> +};
> +
> +void fn1() {
> + C e;
> + int f = e.m_fn3(f);
> +}
> Index: gcc/tree-vect-slp.c
> ===================================================================
> *** gcc/tree-vect-slp.c (revision 231610)
> --- gcc/tree-vect-slp.c (working copy)
> *************** vect_free_slp_tree (slp_tree node)
> *** 51,59 ****
> int i;
> slp_tree child;
>
> - if (!node)
> - return;
> -
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> vect_free_slp_tree (child);
>
> --- 51,56 ----
> *************** vect_create_new_slp_node (vec<gimple *>
> *** 103,108 ****
> --- 100,106 ----
> SLP_TREE_CHILDREN (node).create (nops);
> SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
> SLP_TREE_TWO_OPERATORS (node) = false;
> + SLP_TREE_DEF_TYPE (node) = vect_internal_def;
>
> return node;
> }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 938,944 ****
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (grandchild != NULL)
> break;
> if (!grandchild)
> {
> --- 936,942 ----
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> break;
> if (!grandchild)
> {
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 946,960 ****
> *max_nunits = old_max_nunits;
> loads->truncate (old_nloads);
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! vect_free_slp_tree (grandchild);
> SLP_TREE_CHILDREN (child).truncate (0);
>
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! vect_free_slp_tree (child);
> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> continue;
> }
> }
> --- 944,958 ----
> *max_nunits = old_max_nunits;
> loads->truncate (old_nloads);
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! vect_free_slp_tree (grandchild);
> SLP_TREE_CHILDREN (child).truncate (0);
>
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> }
> }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 992,999 ****
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building vector operands from scalars\n");
> oprnd_info->def_stmts = vNULL;
> ! vect_free_slp_tree (child);
> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> continue;
> }
>
> --- 990,997 ----
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building vector operands from scalars\n");
> oprnd_info->def_stmts = vNULL;
> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> }
>
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1044,1049 ****
> --- 1042,1061 ----
> tem, npermutes, &this_tree_size,
> max_tree_size))
> {
> + /* ... so if successful we can apply the operand swapping
> + to the GIMPLE IL. This is necessary because for example
> + vect_get_slp_defs uses operand indexes and thus expects
> + canonical operand order. This is also necessary even
> + if we end up building the operand from scalars as
> + we'll continue to process swapped operand two. */
> + for (j = 0; j < group_size; ++j)
> + if (!matches[j])
> + {
> + gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> + swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> + gimple_assign_rhs2_ptr (stmt));
> + }
> +
> /* If we have all children of child built up from scalars then
> just throw that away and build it up this node from scalars. */
> if (!SLP_TREE_CHILDREN (child).is_empty ())
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1052,1058 ****
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (grandchild != NULL)
> break;
> if (!grandchild)
> {
> --- 1064,1070 ----
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> break;
> if (!grandchild)
> {
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1067,1089 ****
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! vect_free_slp_tree (child);
> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> continue;
> }
> }
>
> - /* ... so if successful we can apply the operand swapping
> - to the GIMPLE IL. This is necessary because for example
> - vect_get_slp_defs uses operand indexes and thus expects
> - canonical operand order. */
> - for (j = 0; j < group_size; ++j)
> - if (!matches[j])
> - {
> - gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> - swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> - gimple_assign_rhs2_ptr (stmt));
> - }
> oprnd_info->def_stmts = vNULL;
> SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> --- 1079,1090 ----
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> }
> }
>
> oprnd_info->def_stmts = vNULL;
> SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> *************** vect_print_slp_tree (int dump_kind, loca
> *** 1114,1123 ****
> gimple *stmt;
> slp_tree child;
>
> ! if (!node)
> ! return;
> !
> ! dump_printf_loc (dump_kind, loc, "node\n");
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> --- 1115,1123 ----
> gimple *stmt;
> slp_tree child;
>
> ! dump_printf_loc (dump_kind, loc, "node%s\n",
> ! SLP_TREE_DEF_TYPE (node) != vect_internal_def
> ! ? " (external)" : "");
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> *************** vect_mark_slp_stmts (slp_tree node, enum
> *** 1140,1146 ****
> gimple *stmt;
> slp_tree child;
>
> ! if (!node)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1140,1146 ----
> gimple *stmt;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_mark_slp_stmts_relevant (slp_tree n
> *** 1162,1168 ****
> stmt_vec_info stmt_info;
> slp_tree child;
>
> ! if (!node)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1162,1168 ----
> stmt_vec_info stmt_info;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1400,1406 ****
> stmt_vector_for_cost *body_cost_vec,
> unsigned ncopies_for_cost)
> {
> ! unsigned i;
> slp_tree child;
> gimple *stmt, *s;
> stmt_vec_info stmt_info;
> --- 1400,1406 ----
> stmt_vector_for_cost *body_cost_vec,
> unsigned ncopies_for_cost)
> {
> ! unsigned i, j;
> slp_tree child;
> gimple *stmt, *s;
> stmt_vec_info stmt_info;
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1409,1415 ****
>
> /* Recurse down the SLP tree. */
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (child)
> vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> body_cost_vec, ncopies_for_cost);
>
> --- 1409,1415 ----
>
> /* Recurse down the SLP tree. */
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> body_cost_vec, ncopies_for_cost);
>
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1464,1472 ****
> --- 1464,1479 ----
> }
> }
>
> + /* Push SLP node def-type to stmts. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> /* Scan operands and account for prologue cost of constants/externals.
> ??? This over-estimates cost for multiple uses and should be
> re-engineered. */
> + stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> lhs = gimple_get_lhs (stmt);
> for (i = 0; i < gimple_num_ops (stmt); ++i)
> {
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1489,1494 ****
> --- 1496,1507 ----
> stmt_info, 0, vect_prologue);
> }
> }
> +
> + /* Restore stmt def-types. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> }
>
> /* Compute the cost for the SLP instance INSTANCE. */
> *************** vect_analyze_slp_instance (vec_info *vin
> *** 1795,1800 ****
> --- 1808,1840 ----
> }
> }
>
> + /* If the loads and stores can be handled with load/store-lane
> + instructions do not generate this SLP instance. */
> + if (is_a <loop_vec_info> (vinfo)
> + && loads_permuted
> + && dr && vect_store_lanes_supported (vectype, group_size))
> + {
> + slp_tree load_node;
> + FOR_EACH_VEC_ELT (loads, i, load_node)
> + {
> + gimple *first_stmt = GROUP_FIRST_ELEMENT
> + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> + if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo),
> + GROUP_SIZE (stmt_vinfo)))
> + break;
> + }
> + if (i == loads.length ())
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "Built SLP cancelled: can use "
> + "load/store-lanes\n");
> + vect_free_slp_instance (new_instance);
> + return false;
> + }
> + }
> +
> vinfo->slp_instances.safe_push (new_instance);
>
> if (dump_enabled_p ())
> *************** vect_detect_hybrid_slp_stmts (slp_tree n
> *** 2004,2010 ****
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> ! if (child)
> vect_detect_hybrid_slp_stmts (child, i, stype);
> }
>
> --- 2044,2050 ----
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> ! if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
> vect_detect_hybrid_slp_stmts (child, i, stype);
> }
>
> *************** static bool
> *** 2185,2201 ****
> vect_slp_analyze_node_operations (slp_tree node)
> {
> bool dummy;
> ! int i;
> gimple *stmt;
> slp_tree child;
>
> ! if (!node)
> return true;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> if (!vect_slp_analyze_node_operations (child))
> return false;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> --- 2225,2248 ----
> vect_slp_analyze_node_operations (slp_tree node)
> {
> bool dummy;
> ! int i, j;
> gimple *stmt;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return true;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> if (!vect_slp_analyze_node_operations (child))
> return false;
>
> + /* Push SLP node def-type to stmts. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> + bool res = true;
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> *************** vect_slp_analyze_node_operations (slp_tr
> *** 2203,2212 ****
> gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>
> if (!vect_analyze_stmt (stmt, &dummy, node))
> ! return false;
> }
>
> ! return true;
> }
>
>
> --- 2250,2268 ----
> gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>
> if (!vect_analyze_stmt (stmt, &dummy, node))
> ! {
> ! res = false;
> ! break;
> ! }
> }
>
> ! /* Restore stmt def-types. */
> ! FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> ! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> ! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> !
> ! return res;
> }
>
>
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2286,2292 ****
> if (!is_gimple_debug (use_stmt)
> && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> use_stmt)
> ! || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
> {
> (*life)[i] = true;
> BREAK_FROM_IMM_USE_STMT (use_iter);
> --- 2342,2348 ----
> if (!is_gimple_debug (use_stmt)
> && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> use_stmt)
> ! || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
> {
> (*life)[i] = true;
> BREAK_FROM_IMM_USE_STMT (use_iter);
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2310,2316 ****
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (child)
> scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>
> return scalar_cost;
> --- 2366,2372 ----
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>
> return scalar_cost;
> *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
> *** 2499,2513 ****
> return NULL;
> }
>
> - /* Mark all the statements that we do not want to vectorize. */
> - for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
> - gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
> - {
> - stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
> - if (STMT_SLP_TYPE (vinfo) != pure_slp)
> - STMT_VINFO_VECTORIZABLE (vinfo) = false;
> - }
> -
> if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
> BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
> {
> --- 2555,2560 ----
> *************** vect_get_slp_defs (vec<tree> ops, slp_tr
> *** 3085,3091 ****
> child = SLP_TREE_CHILDREN (slp_node)[child_index];
>
> /* We have to check both pattern and original def, if available. */
> ! if (child)
> {
> gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> gimple *related
> --- 3132,3138 ----
> child = SLP_TREE_CHILDREN (slp_node)[child_index];
>
> /* We have to check both pattern and original def, if available. */
> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> {
> gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> gimple *related
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3374,3388 ****
> stmt_vec_info stmt_info;
> unsigned int vec_stmts_size, nunits, group_size;
> tree vectype;
> ! int i;
> slp_tree child;
>
> ! if (!node)
> return false;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> vect_schedule_slp_instance (child, instance, vectorization_factor);
>
> stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> stmt_info = vinfo_for_stmt (stmt);
>
> --- 3421,3441 ----
> stmt_vec_info stmt_info;
> unsigned int vec_stmts_size, nunits, group_size;
> tree vectype;
> ! int i, j;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return false;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> vect_schedule_slp_instance (child, instance, vectorization_factor);
>
> + /* Push SLP node def-type to stmts. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> stmt_info = vinfo_for_stmt (stmt);
>
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3501,3506 ****
> --- 3554,3566 ----
> }
> }
> is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
> +
> + /* Restore stmt def-types. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> +
> return is_store;
> }
>
> *************** vect_remove_slp_scalar_calls (slp_tree n
> *** 3519,3525 ****
> tree lhs;
> stmt_vec_info stmt_info;
>
> ! if (!node)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> --- 3579,3585 ----
> tree lhs;
> stmt_vec_info stmt_info;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix PR68852
2015-12-14 15:14 [PATCH] Fix PR68852 Richard Biener
2015-12-14 15:34 ` Richard Biener
@ 2015-12-17 16:31 ` Kyrill Tkachov
2015-12-18 8:57 ` Richard Biener
1 sibling, 1 reply; 5+ messages in thread
From: Kyrill Tkachov @ 2015-12-17 16:31 UTC (permalink / raw)
To: Richard Biener, gcc-patches
On 14/12/15 15:14, Richard Biener wrote:
> The following fixes PR68852 - so I finally needed to sit down and
> fix the "build-from-scalars" hack in the SLP vectorizer by pretending
> we'd have a sane vectorizer IL. Basically I now mark the SLP node
> with a proper vect_def_type but I have to push that down to the
> stmt-info level whenever sth would look at it.
>
> It's a bit ugly but not too much yet ;)
>
> Anyway, the proper fix is to have a sane data structure, nothing for
> GCC 6 though.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Verified SPEC CPU 2006 is happy with the patch.
Unfortunately it's not very happy on aarch64 ;)
416.gamess and the trans.fppized.f in particular ICEs after this patch with
trans.fppized.f:2086:0:
SUBROUTINE TRFMCX(NPRINT,ICORBS,IORBS,IORB,DOFOCK,DOEXCH,
internal compiler error: in vect_analyze_stmt, at tree-vect-stmts.c:8013
0xd34d1b vect_analyze_stmt(gimple*, bool*, _slp_tree*)
$SRC/tree-vect-stmts.c:8013
0xd4b64a vect_slp_analyze_node_operations
$SRC/tree-vect-slp.c:2237
0xd4b533 vect_slp_analyze_node_operations
$SRC/tree-vect-slp.c:2221
0xd4b533 vect_slp_analyze_node_operations
$SRC/tree-vect-slp.c:2221
0xd4b533 vect_slp_analyze_node_operations
$SRC/tree-vect-slp.c:2221
0xd4b533 vect_slp_analyze_node_operations
$SRC/tree-vect-slp.c:2221
0xd4f7dc vect_slp_analyze_operations(vec<_slp_instance*, va_heap, vl_ptr>, void*)
$SRC/tree-vect-slp.c:2269
0xd546a0 vect_slp_analyze_bb_1
$SRC/tree-vect-slp.c:2543
0xd546a0 vect_slp_bb(basic_block_def*)
$SRC/tree-vect-slp.c:2630
0xd56985 execute
$SRC/tree-vectorizer.c:759
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <http://gcc.gnu.org/bugs.html> for instructions.
when using the flags
-mcpu=cortex-a53+crypto -save-temps -Ofast -fomit-frame-pointer -fno-aggressive-loop-optimizations
I'll open a bug report to keep track of it.
Thanks,
Kyrill
> Richard.
>
> 2015-12-14 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/68852
> * tree-vectorizer.h (struct _slp_tree): Add def_type member.
> (SLP_TREE_DEF_TYPE): New accessor.
> * tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
> hack.
> * tree-vect-slp.c (vect_create_new_slp_node): Initialize
> SLP_TREE_DEF_TYPE.
> (vect_build_slp_tree): When a node is to be built up from scalars
> do not push a NULL as child but instead set its def_type to
> vect_external_def.
> (vect_analyze_slp_cost_1): Check for child def-type instead
> of NULL.
> (vect_detect_hybrid_slp_stmts): Likewise.
> (vect_bb_slp_scalar_cost): Likewise.
> (vect_get_slp_defs): Likewise.
> (vect_slp_analyze_node_operations): Likewise. Before
> processing node push the children def-types to the underlying
> stmts vinfo and restore it afterwards.
> (vect_schedule_slp_instance): Likewise.
> (vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
> as not vectorizable.
>
> * g++.dg/torture/pr68852.C: New testcase.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> *** gcc/tree-vectorizer.h (revision 231552)
> --- gcc/tree-vectorizer.h (working copy)
> *************** struct _slp_tree {
> *** 107,112 ****
> --- 107,114 ----
> unsigned int vec_stmts_size;
> /* Whether the scalar computations use two different operators. */
> bool two_operators;
> + /* The DEF type of this node. */
> + enum vect_def_type def_type;
> };
>
>
> *************** typedef struct _slp_instance {
> *** 139,144 ****
> --- 141,147 ----
> #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
> #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
> #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
> + #define SLP_TREE_DEF_TYPE(S) (S)->def_type
>
>
>
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c (revision 231552)
> --- gcc/tree-vect-stmts.c (working copy)
> *************** vect_is_simple_use (tree operand, vec_in
> *** 8649,8658 ****
> else
> {
> stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> ! if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
> ! *dt = vect_external_def;
> ! else
> ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> }
>
> if (dump_enabled_p ())
> --- 8652,8658 ----
> else
> {
> stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> }
>
> if (dump_enabled_p ())
> Index: gcc/testsuite/g++.dg/torture/pr68852.C
> ===================================================================
> --- gcc/testsuite/g++.dg/torture/pr68852.C (revision 0)
> +++ gcc/testsuite/g++.dg/torture/pr68852.C (working copy)
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +
> +struct A {
> + double x, y, z, w;
> + A() {}
> + A(double, double p2, double p3, double) : y(p2), z(p3) {}
> + void m_fn1();
> +};
> +
> +struct B {
> + double x, y;
> +};
> +struct D : A {
> + D() {}
> + D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
> +};
> +
> +class C {
> +public:
> + float _11, _12, _13, _14;
> + float _21, _22, _23, _24;
> + float _31, _32, _33, _34;
> + float _41, _42, _43, _44;
> + D m_fn2(B p1) {
> + double z(p1.x + _43);
> + return *this * D(p1.x, p1.y, z, 1);
> + }
> + int ProjectRectBounds_next;
> + B __trans_tmp_3;
> + int m_fn3(int) {
> + B a, b;
> + D c[1];
> + b = __trans_tmp_3;
> + c[2] = m_fn2(b);
> + c[3] = m_fn2(a);
> + c[ProjectRectBounds_next].m_fn1();
> + }
> + D operator*(D p1) {
> + D d;
> + d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
> + d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
> + d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
> + d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
> + return d;
> + }
> +};
> +
> +void fn1() {
> + C e;
> + int f = e.m_fn3(f);
> +}
> Index: gcc/tree-vect-slp.c
> ===================================================================
> *** gcc/tree-vect-slp.c (revision 231610)
> --- gcc/tree-vect-slp.c (working copy)
> *************** vect_free_slp_tree (slp_tree node)
> *** 51,59 ****
> int i;
> slp_tree child;
>
> - if (!node)
> - return;
> -
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> vect_free_slp_tree (child);
>
> --- 51,56 ----
> *************** vect_create_new_slp_node (vec<gimple *>
> *** 103,108 ****
> --- 100,106 ----
> SLP_TREE_CHILDREN (node).create (nops);
> SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
> SLP_TREE_TWO_OPERATORS (node) = false;
> + SLP_TREE_DEF_TYPE (node) = vect_internal_def;
>
> return node;
> }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 938,944 ****
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (grandchild != NULL)
> break;
> if (!grandchild)
> {
> --- 936,942 ----
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> break;
> if (!grandchild)
> {
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 946,960 ****
> *max_nunits = old_max_nunits;
> loads->truncate (old_nloads);
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! vect_free_slp_tree (grandchild);
> SLP_TREE_CHILDREN (child).truncate (0);
>
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! vect_free_slp_tree (child);
> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> continue;
> }
> }
> --- 944,958 ----
> *max_nunits = old_max_nunits;
> loads->truncate (old_nloads);
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! vect_free_slp_tree (grandchild);
> SLP_TREE_CHILDREN (child).truncate (0);
>
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> }
> }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 992,999 ****
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building vector operands from scalars\n");
> oprnd_info->def_stmts = vNULL;
> ! vect_free_slp_tree (child);
> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> continue;
> }
>
> --- 990,997 ----
> dump_printf_loc (MSG_NOTE, vect_location,
> "Building vector operands from scalars\n");
> oprnd_info->def_stmts = vNULL;
> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> }
>
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1044,1049 ****
> --- 1042,1061 ----
> tem, npermutes, &this_tree_size,
> max_tree_size))
> {
> + /* ... so if successful we can apply the operand swapping
> + to the GIMPLE IL. This is necessary because for example
> + vect_get_slp_defs uses operand indexes and thus expects
> + canonical operand order. This is also necessary even
> + if we end up building the operand from scalars as
> + we'll continue to process swapped operand two. */
> + for (j = 0; j < group_size; ++j)
> + if (!matches[j])
> + {
> + gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> + swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> + gimple_assign_rhs2_ptr (stmt));
> + }
> +
> /* If we have all children of child built up from scalars then
> just throw that away and build it up this node from scalars. */
> if (!SLP_TREE_CHILDREN (child).is_empty ())
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1052,1058 ****
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (grandchild != NULL)
> break;
> if (!grandchild)
> {
> --- 1064,1070 ----
> slp_tree grandchild;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> break;
> if (!grandchild)
> {
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1067,1089 ****
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! vect_free_slp_tree (child);
> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> continue;
> }
> }
>
> - /* ... so if successful we can apply the operand swapping
> - to the GIMPLE IL. This is necessary because for example
> - vect_get_slp_defs uses operand indexes and thus expects
> - canonical operand order. */
> - for (j = 0; j < group_size; ++j)
> - if (!matches[j])
> - {
> - gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> - swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> - gimple_assign_rhs2_ptr (stmt));
> - }
> oprnd_info->def_stmts = vNULL;
> SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> --- 1079,1090 ----
> "Building parent vector operands from "
> "scalars instead\n");
> oprnd_info->def_stmts = vNULL;
> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> }
> }
>
> oprnd_info->def_stmts = vNULL;
> SLP_TREE_CHILDREN (*node).quick_push (child);
> continue;
> *************** vect_print_slp_tree (int dump_kind, loca
> *** 1114,1123 ****
> gimple *stmt;
> slp_tree child;
>
> ! if (!node)
> ! return;
> !
> ! dump_printf_loc (dump_kind, loc, "node\n");
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> --- 1115,1123 ----
> gimple *stmt;
> slp_tree child;
>
> ! dump_printf_loc (dump_kind, loc, "node%s\n",
> ! SLP_TREE_DEF_TYPE (node) != vect_internal_def
> ! ? " (external)" : "");
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> *************** vect_mark_slp_stmts (slp_tree node, enum
> *** 1140,1146 ****
> gimple *stmt;
> slp_tree child;
>
> ! if (!node)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1140,1146 ----
> gimple *stmt;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_mark_slp_stmts_relevant (slp_tree n
> *** 1162,1168 ****
> stmt_vec_info stmt_info;
> slp_tree child;
>
> ! if (!node)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1162,1168 ----
> stmt_vec_info stmt_info;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1400,1406 ****
> stmt_vector_for_cost *body_cost_vec,
> unsigned ncopies_for_cost)
> {
> ! unsigned i;
> slp_tree child;
> gimple *stmt, *s;
> stmt_vec_info stmt_info;
> --- 1400,1406 ----
> stmt_vector_for_cost *body_cost_vec,
> unsigned ncopies_for_cost)
> {
> ! unsigned i, j;
> slp_tree child;
> gimple *stmt, *s;
> stmt_vec_info stmt_info;
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1409,1415 ****
>
> /* Recurse down the SLP tree. */
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (child)
> vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> body_cost_vec, ncopies_for_cost);
>
> --- 1409,1415 ----
>
> /* Recurse down the SLP tree. */
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> body_cost_vec, ncopies_for_cost);
>
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1464,1472 ****
> --- 1464,1479 ----
> }
> }
>
> + /* Push SLP node def-type to stmts. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> /* Scan operands and account for prologue cost of constants/externals.
> ??? This over-estimates cost for multiple uses and should be
> re-engineered. */
> + stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> lhs = gimple_get_lhs (stmt);
> for (i = 0; i < gimple_num_ops (stmt); ++i)
> {
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1489,1494 ****
> --- 1496,1507 ----
> stmt_info, 0, vect_prologue);
> }
> }
> +
> + /* Restore stmt def-types. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> }
>
> /* Compute the cost for the SLP instance INSTANCE. */
> *************** vect_analyze_slp_instance (vec_info *vin
> *** 1795,1800 ****
> --- 1808,1840 ----
> }
> }
>
> + /* If the loads and stores can be handled with load/store-lane
> + instructions do not generate this SLP instance. */
> + if (is_a <loop_vec_info> (vinfo)
> + && loads_permuted
> + && dr && vect_store_lanes_supported (vectype, group_size))
> + {
> + slp_tree load_node;
> + FOR_EACH_VEC_ELT (loads, i, load_node)
> + {
> + gimple *first_stmt = GROUP_FIRST_ELEMENT
> + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> + if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo),
> + GROUP_SIZE (stmt_vinfo)))
> + break;
> + }
> + if (i == loads.length ())
> + {
> + if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + "Built SLP cancelled: can use "
> + "load/store-lanes\n");
> + vect_free_slp_instance (new_instance);
> + return false;
> + }
> + }
> +
> vinfo->slp_instances.safe_push (new_instance);
>
> if (dump_enabled_p ())
> *************** vect_detect_hybrid_slp_stmts (slp_tree n
> *** 2004,2010 ****
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> ! if (child)
> vect_detect_hybrid_slp_stmts (child, i, stype);
> }
>
> --- 2044,2050 ----
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> ! if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
> vect_detect_hybrid_slp_stmts (child, i, stype);
> }
>
> *************** static bool
> *** 2185,2201 ****
> vect_slp_analyze_node_operations (slp_tree node)
> {
> bool dummy;
> ! int i;
> gimple *stmt;
> slp_tree child;
>
> ! if (!node)
> return true;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> if (!vect_slp_analyze_node_operations (child))
> return false;
>
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> --- 2225,2248 ----
> vect_slp_analyze_node_operations (slp_tree node)
> {
> bool dummy;
> ! int i, j;
> gimple *stmt;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return true;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> if (!vect_slp_analyze_node_operations (child))
> return false;
>
> + /* Push SLP node def-type to stmts. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> + bool res = true;
> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> {
> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> *************** vect_slp_analyze_node_operations (slp_tr
> *** 2203,2212 ****
> gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>
> if (!vect_analyze_stmt (stmt, &dummy, node))
> ! return false;
> }
>
> ! return true;
> }
>
>
> --- 2250,2268 ----
> gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>
> if (!vect_analyze_stmt (stmt, &dummy, node))
> ! {
> ! res = false;
> ! break;
> ! }
> }
>
> ! /* Restore stmt def-types. */
> ! FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> ! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> ! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> !
> ! return res;
> }
>
>
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2286,2292 ****
> if (!is_gimple_debug (use_stmt)
> && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> use_stmt)
> ! || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
> {
> (*life)[i] = true;
> BREAK_FROM_IMM_USE_STMT (use_iter);
> --- 2342,2348 ----
> if (!is_gimple_debug (use_stmt)
> && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> use_stmt)
> ! || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
> {
> (*life)[i] = true;
> BREAK_FROM_IMM_USE_STMT (use_iter);
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2310,2316 ****
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (child)
> scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>
> return scalar_cost;
> --- 2366,2372 ----
> }
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>
> return scalar_cost;
> *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
> *** 2499,2513 ****
> return NULL;
> }
>
> - /* Mark all the statements that we do not want to vectorize. */
> - for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
> - gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
> - {
> - stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
> - if (STMT_SLP_TYPE (vinfo) != pure_slp)
> - STMT_VINFO_VECTORIZABLE (vinfo) = false;
> - }
> -
> if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
> BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
> {
> --- 2555,2560 ----
> *************** vect_get_slp_defs (vec<tree> ops, slp_tr
> *** 3085,3091 ****
> child = SLP_TREE_CHILDREN (slp_node)[child_index];
>
> /* We have to check both pattern and original def, if available. */
> ! if (child)
> {
> gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> gimple *related
> --- 3132,3138 ----
> child = SLP_TREE_CHILDREN (slp_node)[child_index];
>
> /* We have to check both pattern and original def, if available. */
> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> {
> gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> gimple *related
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3374,3388 ****
> stmt_vec_info stmt_info;
> unsigned int vec_stmts_size, nunits, group_size;
> tree vectype;
> ! int i;
> slp_tree child;
>
> ! if (!node)
> return false;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> vect_schedule_slp_instance (child, instance, vectorization_factor);
>
> stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> stmt_info = vinfo_for_stmt (stmt);
>
> --- 3421,3441 ----
> stmt_vec_info stmt_info;
> unsigned int vec_stmts_size, nunits, group_size;
> tree vectype;
> ! int i, j;
> slp_tree child;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return false;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> vect_schedule_slp_instance (child, instance, vectorization_factor);
>
> + /* Push SLP node def-type to stmts. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> stmt_info = vinfo_for_stmt (stmt);
>
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3501,3506 ****
> --- 3554,3566 ----
> }
> }
> is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
> +
> + /* Restore stmt def-types. */
> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> +
> return is_store;
> }
>
> *************** vect_remove_slp_scalar_calls (slp_tree n
> *** 3519,3525 ****
> tree lhs;
> stmt_vec_info stmt_info;
>
> ! if (!node)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> --- 3579,3585 ----
> tree lhs;
> stmt_vec_info stmt_info;
>
> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> return;
>
> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix PR68852
2015-12-17 16:31 ` Kyrill Tkachov
@ 2015-12-18 8:57 ` Richard Biener
2015-12-18 9:09 ` Kyrill Tkachov
0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2015-12-18 8:57 UTC (permalink / raw)
To: Kyrill Tkachov; +Cc: gcc-patches
On Thu, 17 Dec 2015, Kyrill Tkachov wrote:
>
> On 14/12/15 15:14, Richard Biener wrote:
> > The following fixes PR68852 - so I finally needed to sit down and
> > fix the "build-from-scalars" hack in the SLP vectorizer by pretending
> > we'd have a sane vectorizer IL. Basically I now mark the SLP node
> > with a proper vect_def_type but I have to push that down to the
> > stmt-info level whenever sth would look at it.
> >
> > It's a bit ugly but not too much yet ;)
> >
> > Anyway, the proper fix is to have a sane data structure, nothing for
> > GCC 6 though.
> >
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> >
> > Verified SPEC CPU 2006 is happy with the patch.
>
> Unfortunately it's not very happy on aarch64 ;)
> 416.gamess and the trans.fppized.f in particular ICEs after this patch with
>
> trans.fppized.f:2086:0:
>
> SUBROUTINE TRFMCX(NPRINT,ICORBS,IORBS,IORB,DOFOCK,DOEXCH,
>
>
> internal compiler error: in vect_analyze_stmt, at tree-vect-stmts.c:8013
> 0xd34d1b vect_analyze_stmt(gimple*, bool*, _slp_tree*)
> $SRC/tree-vect-stmts.c:8013
> 0xd4b64a vect_slp_analyze_node_operations
> $SRC/tree-vect-slp.c:2237
> 0xd4b533 vect_slp_analyze_node_operations
> $SRC/tree-vect-slp.c:2221
> 0xd4b533 vect_slp_analyze_node_operations
> $SRC/tree-vect-slp.c:2221
> 0xd4b533 vect_slp_analyze_node_operations
> $SRC/tree-vect-slp.c:2221
> 0xd4b533 vect_slp_analyze_node_operations
> $SRC/tree-vect-slp.c:2221
> 0xd4f7dc vect_slp_analyze_operations(vec<_slp_instance*, va_heap, vl_ptr>,
> void*)
> $SRC/tree-vect-slp.c:2269
> 0xd546a0 vect_slp_analyze_bb_1
> $SRC/tree-vect-slp.c:2543
> 0xd546a0 vect_slp_bb(basic_block_def*)
> $SRC/tree-vect-slp.c:2630
> 0xd56985 execute
> $SRC/tree-vectorizer.c:759
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See <http://gcc.gnu.org/bugs.html> for instructions.
>
> when using the flags
> -mcpu=cortex-a53+crypto -save-temps -Ofast -fomit-frame-pointer
> -fno-aggressive-loop-optimizations
>
> I'll open a bug report to keep track of it.
This sounds like PR68946 which I just fixed?
Richard.
> Thanks,
> Kyrill
>
> > Richard.
> >
> > 2015-12-14 Richard Biener <rguenther@suse.de>
> >
> > PR tree-optimization/68852
> > * tree-vectorizer.h (struct _slp_tree): Add def_type member.
> > (SLP_TREE_DEF_TYPE): New accessor.
> > * tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
> > hack.
> > * tree-vect-slp.c (vect_create_new_slp_node): Initialize
> > SLP_TREE_DEF_TYPE.
> > (vect_build_slp_tree): When a node is to be built up from scalars
> > do not push a NULL as child but instead set its def_type to
> > vect_external_def.
> > (vect_analyze_slp_cost_1): Check for child def-type instead
> > of NULL.
> > (vect_detect_hybrid_slp_stmts): Likewise.
> > (vect_bb_slp_scalar_cost): Likewise.
> > (vect_get_slp_defs): Likewise.
> > (vect_slp_analyze_node_operations): Likewise. Before
> > processing node push the children def-types to the underlying
> > stmts vinfo and restore it afterwards.
> > (vect_schedule_slp_instance): Likewise.
> > (vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
> > as not vectorizable.
> >
> > * g++.dg/torture/pr68852.C: New testcase.
> >
> > Index: gcc/tree-vectorizer.h
> > ===================================================================
> > *** gcc/tree-vectorizer.h (revision 231552)
> > --- gcc/tree-vectorizer.h (working copy)
> > *************** struct _slp_tree {
> > *** 107,112 ****
> > --- 107,114 ----
> > unsigned int vec_stmts_size;
> > /* Whether the scalar computations use two different operators. */
> > bool two_operators;
> > + /* The DEF type of this node. */
> > + enum vect_def_type def_type;
> > };
> > *************** typedef struct _slp_instance {
> > *** 139,144 ****
> > --- 141,147 ----
> > #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
> > #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
> > #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
> > + #define SLP_TREE_DEF_TYPE(S) (S)->def_type
> > Index: gcc/tree-vect-stmts.c
> > ===================================================================
> > *** gcc/tree-vect-stmts.c (revision 231552)
> > --- gcc/tree-vect-stmts.c (working copy)
> > *************** vect_is_simple_use (tree operand, vec_in
> > *** 8649,8658 ****
> > else
> > {
> > stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> > ! if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE
> > (stmt_vinfo))
> > ! *dt = vect_external_def;
> > ! else
> > ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> > }
> > if (dump_enabled_p ())
> > --- 8652,8658 ----
> > else
> > {
> > stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> > ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> > }
> > if (dump_enabled_p ())
> > Index: gcc/testsuite/g++.dg/torture/pr68852.C
> > ===================================================================
> > --- gcc/testsuite/g++.dg/torture/pr68852.C (revision 0)
> > +++ gcc/testsuite/g++.dg/torture/pr68852.C (working copy)
> > @@ -0,0 +1,51 @@
> > +/* { dg-do compile } */
> > +
> > +struct A {
> > + double x, y, z, w;
> > + A() {}
> > + A(double, double p2, double p3, double) : y(p2), z(p3) {}
> > + void m_fn1();
> > +};
> > +
> > +struct B {
> > + double x, y;
> > +};
> > +struct D : A {
> > + D() {}
> > + D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
> > +};
> > +
> > +class C {
> > +public:
> > + float _11, _12, _13, _14;
> > + float _21, _22, _23, _24;
> > + float _31, _32, _33, _34;
> > + float _41, _42, _43, _44;
> > + D m_fn2(B p1) {
> > + double z(p1.x + _43);
> > + return *this * D(p1.x, p1.y, z, 1);
> > + }
> > + int ProjectRectBounds_next;
> > + B __trans_tmp_3;
> > + int m_fn3(int) {
> > + B a, b;
> > + D c[1];
> > + b = __trans_tmp_3;
> > + c[2] = m_fn2(b);
> > + c[3] = m_fn2(a);
> > + c[ProjectRectBounds_next].m_fn1();
> > + }
> > + D operator*(D p1) {
> > + D d;
> > + d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
> > + d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
> > + d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
> > + d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
> > + return d;
> > + }
> > +};
> > +
> > +void fn1() {
> > + C e;
> > + int f = e.m_fn3(f);
> > +}
> > Index: gcc/tree-vect-slp.c
> > ===================================================================
> > *** gcc/tree-vect-slp.c (revision 231610)
> > --- gcc/tree-vect-slp.c (working copy)
> > *************** vect_free_slp_tree (slp_tree node)
> > *** 51,59 ****
> > int i;
> > slp_tree child;
> > - if (!node)
> > - return;
> > -
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > vect_free_slp_tree (child);
> > --- 51,56 ----
> > *************** vect_create_new_slp_node (vec<gimple *>
> > *** 103,108 ****
> > --- 100,106 ----
> > SLP_TREE_CHILDREN (node).create (nops);
> > SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
> > SLP_TREE_TWO_OPERATORS (node) = false;
> > + SLP_TREE_DEF_TYPE (node) = vect_internal_def;
> > return node;
> > }
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 938,944 ****
> > slp_tree grandchild;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! if (grandchild != NULL)
> > break;
> > if (!grandchild)
> > {
> > --- 936,942 ----
> > slp_tree grandchild;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> > break;
> > if (!grandchild)
> > {
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 946,960 ****
> > *max_nunits = old_max_nunits;
> > loads->truncate (old_nloads);
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> > ! vect_free_slp_tree (grandchild);
> > SLP_TREE_CHILDREN (child).truncate (0);
> > dump_printf_loc (MSG_NOTE, vect_location,
> > "Building parent vector operands from "
> > "scalars instead\n");
> > oprnd_info->def_stmts = vNULL;
> > ! vect_free_slp_tree (child);
> > ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> > continue;
> > }
> > }
> > --- 944,958 ----
> > *max_nunits = old_max_nunits;
> > loads->truncate (old_nloads);
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> > ! vect_free_slp_tree (grandchild);
> > SLP_TREE_CHILDREN (child).truncate (0);
> > dump_printf_loc (MSG_NOTE, vect_location,
> > "Building parent vector operands from "
> > "scalars instead\n");
> > oprnd_info->def_stmts = vNULL;
> > ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> > ! SLP_TREE_CHILDREN (*node).quick_push (child);
> > continue;
> > }
> > }
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 992,999 ****
> > dump_printf_loc (MSG_NOTE, vect_location,
> > "Building vector operands from scalars\n");
> > oprnd_info->def_stmts = vNULL;
> > ! vect_free_slp_tree (child);
> > ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> > continue;
> > }
> > --- 990,997 ----
> > dump_printf_loc (MSG_NOTE, vect_location,
> > "Building vector operands from scalars\n");
> > oprnd_info->def_stmts = vNULL;
> > ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> > ! SLP_TREE_CHILDREN (*node).quick_push (child);
> > continue;
> > }
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 1044,1049 ****
> > --- 1042,1061 ----
> > tem, npermutes, &this_tree_size,
> > max_tree_size))
> > {
> > + /* ... so if successful we can apply the operand swapping
> > + to the GIMPLE IL. This is necessary because for example
> > + vect_get_slp_defs uses operand indexes and thus expects
> > + canonical operand order. This is also necessary even
> > + if we end up building the operand from scalars as
> > + we'll continue to process swapped operand two. */
> > + for (j = 0; j < group_size; ++j)
> > + if (!matches[j])
> > + {
> > + gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> > + swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> > + gimple_assign_rhs2_ptr (stmt));
> > + }
> > +
> > /* If we have all children of child built up from scalars then
> > just throw that away and build it up this node from scalars.
> > */
> > if (!SLP_TREE_CHILDREN (child).is_empty ())
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 1052,1058 ****
> > slp_tree grandchild;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! if (grandchild != NULL)
> > break;
> > if (!grandchild)
> > {
> > --- 1064,1070 ----
> > slp_tree grandchild;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> > break;
> > if (!grandchild)
> > {
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 1067,1089 ****
> > "Building parent vector operands from "
> > "scalars instead\n");
> > oprnd_info->def_stmts = vNULL;
> > ! vect_free_slp_tree (child);
> > ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
> > continue;
> > }
> > }
> > - /* ... so if successful we can apply the operand swapping
> > - to the GIMPLE IL. This is necessary because for example
> > - vect_get_slp_defs uses operand indexes and thus expects
> > - canonical operand order. */
> > - for (j = 0; j < group_size; ++j)
> > - if (!matches[j])
> > - {
> > - gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> > - swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> > - gimple_assign_rhs2_ptr (stmt));
> > - }
> > oprnd_info->def_stmts = vNULL;
> > SLP_TREE_CHILDREN (*node).quick_push (child);
> > continue;
> > --- 1079,1090 ----
> > "Building parent vector operands from "
> > "scalars instead\n");
> > oprnd_info->def_stmts = vNULL;
> > ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
> > ! SLP_TREE_CHILDREN (*node).quick_push (child);
> > continue;
> > }
> > }
> > oprnd_info->def_stmts = vNULL;
> > SLP_TREE_CHILDREN (*node).quick_push (child);
> > continue;
> > *************** vect_print_slp_tree (int dump_kind, loca
> > *** 1114,1123 ****
> > gimple *stmt;
> > slp_tree child;
> > ! if (!node)
> > ! return;
> > !
> > ! dump_printf_loc (dump_kind, loc, "node\n");
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > {
> > dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> > --- 1115,1123 ----
> > gimple *stmt;
> > slp_tree child;
> > ! dump_printf_loc (dump_kind, loc, "node%s\n",
> > ! SLP_TREE_DEF_TYPE (node) != vect_internal_def
> > ! ? " (external)" : "");
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > {
> > dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> > *************** vect_mark_slp_stmts (slp_tree node, enum
> > *** 1140,1146 ****
> > gimple *stmt;
> > slp_tree child;
> > ! if (!node)
> > return;
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > --- 1140,1146 ----
> > gimple *stmt;
> > slp_tree child;
> > ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> > return;
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > *************** vect_mark_slp_stmts_relevant (slp_tree n
> > *** 1162,1168 ****
> > stmt_vec_info stmt_info;
> > slp_tree child;
> > ! if (!node)
> > return;
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > --- 1162,1168 ----
> > stmt_vec_info stmt_info;
> > slp_tree child;
> > ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> > return;
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1400,1406 ****
> > stmt_vector_for_cost *body_cost_vec,
> > unsigned ncopies_for_cost)
> > {
> > ! unsigned i;
> > slp_tree child;
> > gimple *stmt, *s;
> > stmt_vec_info stmt_info;
> > --- 1400,1406 ----
> > stmt_vector_for_cost *body_cost_vec,
> > unsigned ncopies_for_cost)
> > {
> > ! unsigned i, j;
> > slp_tree child;
> > gimple *stmt, *s;
> > stmt_vec_info stmt_info;
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1409,1415 ****
> > /* Recurse down the SLP tree. */
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > ! if (child)
> > vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> > body_cost_vec, ncopies_for_cost);
> > --- 1409,1415 ----
> > /* Recurse down the SLP tree. */
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> > vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> > body_cost_vec, ncopies_for_cost);
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1464,1472 ****
> > --- 1464,1479 ----
> > }
> > }
> > + /* Push SLP node def-type to stmts. */
> > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
> > (child);
> > +
> > /* Scan operands and account for prologue cost of constants/externals.
> > ??? This over-estimates cost for multiple uses and should be
> > re-engineered. */
> > + stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> > lhs = gimple_get_lhs (stmt);
> > for (i = 0; i < gimple_num_ops (stmt); ++i)
> > {
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1489,1494 ****
> > --- 1496,1507 ----
> > stmt_info, 0, vect_prologue);
> > }
> > }
> > +
> > + /* Restore stmt def-types. */
> > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> > }
> > /* Compute the cost for the SLP instance INSTANCE. */
> > *************** vect_analyze_slp_instance (vec_info *vin
> > *** 1795,1800 ****
> > --- 1808,1840 ----
> > }
> > }
> > + /* If the loads and stores can be handled with load/store-lane
> > + instructions do not generate this SLP instance. */
> > + if (is_a <loop_vec_info> (vinfo)
> > + && loads_permuted
> > + && dr && vect_store_lanes_supported (vectype, group_size))
> > + {
> > + slp_tree load_node;
> > + FOR_EACH_VEC_ELT (loads, i, load_node)
> > + {
> > + gimple *first_stmt = GROUP_FIRST_ELEMENT
> > + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> > + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> > + if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE
> > (stmt_vinfo),
> > + GROUP_SIZE (stmt_vinfo)))
> > + break;
> > + }
> > + if (i == loads.length ())
> > + {
> > + if (dump_enabled_p ())
> > + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + "Built SLP cancelled: can use "
> > + "load/store-lanes\n");
> > + vect_free_slp_instance (new_instance);
> > + return false;
> > + }
> > + }
> > +
> > vinfo->slp_instances.safe_push (new_instance);
> > if (dump_enabled_p ())
> > *************** vect_detect_hybrid_slp_stmts (slp_tree n
> > *** 2004,2010 ****
> > }
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> > ! if (child)
> > vect_detect_hybrid_slp_stmts (child, i, stype);
> > }
> > --- 2044,2050 ----
> > }
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> > ! if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
> > vect_detect_hybrid_slp_stmts (child, i, stype);
> > }
> > *************** static bool
> > *** 2185,2201 ****
> > vect_slp_analyze_node_operations (slp_tree node)
> > {
> > bool dummy;
> > ! int i;
> > gimple *stmt;
> > slp_tree child;
> > ! if (!node)
> > return true;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > if (!vect_slp_analyze_node_operations (child))
> > return false;
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > {
> > stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> > --- 2225,2248 ----
> > vect_slp_analyze_node_operations (slp_tree node)
> > {
> > bool dummy;
> > ! int i, j;
> > gimple *stmt;
> > slp_tree child;
> > ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> > return true;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > if (!vect_slp_analyze_node_operations (child))
> > return false;
> > + /* Push SLP node def-type to stmts. */
> > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
> > (child);
> > +
> > + bool res = true;
> > FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > {
> > stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> > *************** vect_slp_analyze_node_operations (slp_tr
> > *** 2203,2212 ****
> > gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
> > if (!vect_analyze_stmt (stmt, &dummy, node))
> > ! return false;
> > }
> > ! return true;
> > }
> > --- 2250,2268 ----
> > gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
> > if (!vect_analyze_stmt (stmt, &dummy, node))
> > ! {
> > ! res = false;
> > ! break;
> > ! }
> > }
> > ! /* Restore stmt def-types. */
> > ! FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > ! if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > ! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > ! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> > !
> > ! return res;
> > }
> > *************** vect_bb_slp_scalar_cost (basic_block bb,
> > *** 2286,2292 ****
> > if (!is_gimple_debug (use_stmt)
> > && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> > use_stmt)
> > ! || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
> > {
> > (*life)[i] = true;
> > BREAK_FROM_IMM_USE_STMT (use_iter);
> > --- 2342,2348 ----
> > if (!is_gimple_debug (use_stmt)
> > && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> > use_stmt)
> > ! || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
> > {
> > (*life)[i] = true;
> > BREAK_FROM_IMM_USE_STMT (use_iter);
> > *************** vect_bb_slp_scalar_cost (basic_block bb,
> > *** 2310,2316 ****
> > }
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > ! if (child)
> > scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
> > return scalar_cost;
> > --- 2366,2372 ----
> > }
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> > scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
> > return scalar_cost;
> > *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
> > *** 2499,2513 ****
> > return NULL;
> > }
> > - /* Mark all the statements that we do not want to vectorize. */
> > - for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
> > - gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
> > - {
> > - stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
> > - if (STMT_SLP_TYPE (vinfo) != pure_slp)
> > - STMT_VINFO_VECTORIZABLE (vinfo) = false;
> > - }
> > -
> > if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
> > BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
> > {
> > --- 2555,2560 ----
> > *************** vect_get_slp_defs (vec<tree> ops, slp_tr
> > *** 3085,3091 ****
> > child = SLP_TREE_CHILDREN (slp_node)[child_index];
> > /* We have to check both pattern and original def, if
> > available. */
> > ! if (child)
> > {
> > gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> > gimple *related
> > --- 3132,3138 ----
> > child = SLP_TREE_CHILDREN (slp_node)[child_index];
> > /* We have to check both pattern and original def, if
> > available. */
> > ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> > {
> > gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> > gimple *related
> > *************** vect_schedule_slp_instance (slp_tree nod
> > *** 3374,3388 ****
> > stmt_vec_info stmt_info;
> > unsigned int vec_stmts_size, nunits, group_size;
> > tree vectype;
> > ! int i;
> > slp_tree child;
> > ! if (!node)
> > return false;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > vect_schedule_slp_instance (child, instance, vectorization_factor);
> > stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> > stmt_info = vinfo_for_stmt (stmt);
> > --- 3421,3441 ----
> > stmt_vec_info stmt_info;
> > unsigned int vec_stmts_size, nunits, group_size;
> > tree vectype;
> > ! int i, j;
> > slp_tree child;
> > ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> > return false;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > vect_schedule_slp_instance (child, instance, vectorization_factor);
> > + /* Push SLP node def-type to stmts. */
> > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
> > (child);
> > +
> > stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> > stmt_info = vinfo_for_stmt (stmt);
> > *************** vect_schedule_slp_instance (slp_tree nod
> > *** 3501,3506 ****
> > --- 3554,3566 ----
> > }
> > }
> > is_store = vect_transform_stmt (stmt, &si, &grouped_store, node,
> > instance);
> > +
> > + /* Restore stmt def-types. */
> > + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> > +
> > return is_store;
> > }
> > *************** vect_remove_slp_scalar_calls (slp_tree n
> > *** 3519,3525 ****
> > tree lhs;
> > stmt_vec_info stmt_info;
> > ! if (!node)
> > return;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > --- 3579,3585 ----
> > tree lhs;
> > stmt_vec_info stmt_info;
> > ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> > return;
> > FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> >
>
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] Fix PR68852
2015-12-18 8:57 ` Richard Biener
@ 2015-12-18 9:09 ` Kyrill Tkachov
0 siblings, 0 replies; 5+ messages in thread
From: Kyrill Tkachov @ 2015-12-18 9:09 UTC (permalink / raw)
To: Richard Biener; +Cc: gcc-patches
On 18/12/15 08:57, Richard Biener wrote:
> On Thu, 17 Dec 2015, Kyrill Tkachov wrote:
>
>> On 14/12/15 15:14, Richard Biener wrote:
>>> The following fixes PR68852 - so I finally needed to sit down and
>>> fix the "build-from-scalars" hack in the SLP vectorizer by pretending
>>> we'd have a sane vectorizer IL. Basically I now mark the SLP node
>>> with a proper vect_def_type but I have to push that down to the
>>> stmt-info level whenever sth would look at it.
>>>
>>> It's a bit ugly but not too much yet ;)
>>>
>>> Anyway, the proper fix is to have a sane data structure, nothing for
>>> GCC 6 though.
>>>
>>> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>>>
>>> Verified SPEC CPU 2006 is happy with the patch.
>> Unfortunately it's not very happy on aarch64 ;)
>> 416.gamess and the trans.fppized.f in particular ICEs after this patch with
>>
>> trans.fppized.f:2086:0:
>>
>> SUBROUTINE TRFMCX(NPRINT,ICORBS,IORBS,IORB,DOFOCK,DOEXCH,
>>
>>
>> internal compiler error: in vect_analyze_stmt, at tree-vect-stmts.c:8013
>> 0xd34d1b vect_analyze_stmt(gimple*, bool*, _slp_tree*)
>> $SRC/tree-vect-stmts.c:8013
>> 0xd4b64a vect_slp_analyze_node_operations
>> $SRC/tree-vect-slp.c:2237
>> 0xd4b533 vect_slp_analyze_node_operations
>> $SRC/tree-vect-slp.c:2221
>> 0xd4b533 vect_slp_analyze_node_operations
>> $SRC/tree-vect-slp.c:2221
>> 0xd4b533 vect_slp_analyze_node_operations
>> $SRC/tree-vect-slp.c:2221
>> 0xd4b533 vect_slp_analyze_node_operations
>> $SRC/tree-vect-slp.c:2221
>> 0xd4f7dc vect_slp_analyze_operations(vec<_slp_instance*, va_heap, vl_ptr>,
>> void*)
>> $SRC/tree-vect-slp.c:2269
>> 0xd546a0 vect_slp_analyze_bb_1
>> $SRC/tree-vect-slp.c:2543
>> 0xd546a0 vect_slp_bb(basic_block_def*)
>> $SRC/tree-vect-slp.c:2630
>> 0xd56985 execute
>> $SRC/tree-vectorizer.c:759
>> Please submit a full bug report,
>> with preprocessed source if appropriate.
>> Please include the complete backtrace with any bug report.
>> See <http://gcc.gnu.org/bugs.html> for instructions.
>>
>> when using the flags
>> -mcpu=cortex-a53+crypto -save-temps -Ofast -fomit-frame-pointer
>> -fno-aggressive-loop-optimizations
>>
>> I'll open a bug report to keep track of it.
> This sounds like PR68946 which I just fixed?
Looks like it. Latest trunk does not ICE.
Sorry for the noise.
Kyrill
> Richard.
>
>> Thanks,
>> Kyrill
>>
>>> Richard.
>>>
>>> 2015-12-14 Richard Biener <rguenther@suse.de>
>>>
>>> PR tree-optimization/68852
>>> * tree-vectorizer.h (struct _slp_tree): Add def_type member.
>>> (SLP_TREE_DEF_TYPE): New accessor.
>>> * tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
>>> hack.
>>> * tree-vect-slp.c (vect_create_new_slp_node): Initialize
>>> SLP_TREE_DEF_TYPE.
>>> (vect_build_slp_tree): When a node is to be built up from scalars
>>> do not push a NULL as child but instead set its def_type to
>>> vect_external_def.
>>> (vect_analyze_slp_cost_1): Check for child def-type instead
>>> of NULL.
>>> (vect_detect_hybrid_slp_stmts): Likewise.
>>> (vect_bb_slp_scalar_cost): Likewise.
>>> (vect_get_slp_defs): Likewise.
>>> (vect_slp_analyze_node_operations): Likewise. Before
>>> processing node push the children def-types to the underlying
>>> stmts vinfo and restore it afterwards.
>>> (vect_schedule_slp_instance): Likewise.
>>> (vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
>>> as not vectorizable.
>>>
>>> * g++.dg/torture/pr68852.C: New testcase.
>>>
>>> Index: gcc/tree-vectorizer.h
>>> ===================================================================
>>> *** gcc/tree-vectorizer.h (revision 231552)
>>> --- gcc/tree-vectorizer.h (working copy)
>>> *************** struct _slp_tree {
>>> *** 107,112 ****
>>> --- 107,114 ----
>>> unsigned int vec_stmts_size;
>>> /* Whether the scalar computations use two different operators. */
>>> bool two_operators;
>>> + /* The DEF type of this node. */
>>> + enum vect_def_type def_type;
>>> };
>>> *************** typedef struct _slp_instance {
>>> *** 139,144 ****
>>> --- 141,147 ----
>>> #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size
>>> #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation
>>> #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators
>>> + #define SLP_TREE_DEF_TYPE(S) (S)->def_type
>>> Index: gcc/tree-vect-stmts.c
>>> ===================================================================
>>> *** gcc/tree-vect-stmts.c (revision 231552)
>>> --- gcc/tree-vect-stmts.c (working copy)
>>> *************** vect_is_simple_use (tree operand, vec_in
>>> *** 8649,8658 ****
>>> else
>>> {
>>> stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
>>> ! if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE
>>> (stmt_vinfo))
>>> ! *dt = vect_external_def;
>>> ! else
>>> ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>>> }
>>> if (dump_enabled_p ())
>>> --- 8652,8658 ----
>>> else
>>> {
>>> stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
>>> ! *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>>> }
>>> if (dump_enabled_p ())
>>> Index: gcc/testsuite/g++.dg/torture/pr68852.C
>>> ===================================================================
>>> --- gcc/testsuite/g++.dg/torture/pr68852.C (revision 0)
>>> +++ gcc/testsuite/g++.dg/torture/pr68852.C (working copy)
>>> @@ -0,0 +1,51 @@
>>> +/* { dg-do compile } */
>>> +
>>> +struct A {
>>> + double x, y, z, w;
>>> + A() {}
>>> + A(double, double p2, double p3, double) : y(p2), z(p3) {}
>>> + void m_fn1();
>>> +};
>>> +
>>> +struct B {
>>> + double x, y;
>>> +};
>>> +struct D : A {
>>> + D() {}
>>> + D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
>>> +};
>>> +
>>> +class C {
>>> +public:
>>> + float _11, _12, _13, _14;
>>> + float _21, _22, _23, _24;
>>> + float _31, _32, _33, _34;
>>> + float _41, _42, _43, _44;
>>> + D m_fn2(B p1) {
>>> + double z(p1.x + _43);
>>> + return *this * D(p1.x, p1.y, z, 1);
>>> + }
>>> + int ProjectRectBounds_next;
>>> + B __trans_tmp_3;
>>> + int m_fn3(int) {
>>> + B a, b;
>>> + D c[1];
>>> + b = __trans_tmp_3;
>>> + c[2] = m_fn2(b);
>>> + c[3] = m_fn2(a);
>>> + c[ProjectRectBounds_next].m_fn1();
>>> + }
>>> + D operator*(D p1) {
>>> + D d;
>>> + d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
>>> + d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
>>> + d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
>>> + d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
>>> + return d;
>>> + }
>>> +};
>>> +
>>> +void fn1() {
>>> + C e;
>>> + int f = e.m_fn3(f);
>>> +}
>>> Index: gcc/tree-vect-slp.c
>>> ===================================================================
>>> *** gcc/tree-vect-slp.c (revision 231610)
>>> --- gcc/tree-vect-slp.c (working copy)
>>> *************** vect_free_slp_tree (slp_tree node)
>>> *** 51,59 ****
>>> int i;
>>> slp_tree child;
>>> - if (!node)
>>> - return;
>>> -
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> vect_free_slp_tree (child);
>>> --- 51,56 ----
>>> *************** vect_create_new_slp_node (vec<gimple *>
>>> *** 103,108 ****
>>> --- 100,106 ----
>>> SLP_TREE_CHILDREN (node).create (nops);
>>> SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
>>> SLP_TREE_TWO_OPERATORS (node) = false;
>>> + SLP_TREE_DEF_TYPE (node) = vect_internal_def;
>>> return node;
>>> }
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 938,944 ****
>>> slp_tree grandchild;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! if (grandchild != NULL)
>>> break;
>>> if (!grandchild)
>>> {
>>> --- 936,942 ----
>>> slp_tree grandchild;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>>> break;
>>> if (!grandchild)
>>> {
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 946,960 ****
>>> *max_nunits = old_max_nunits;
>>> loads->truncate (old_nloads);
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
>>> ! vect_free_slp_tree (grandchild);
>>> SLP_TREE_CHILDREN (child).truncate (0);
>>> dump_printf_loc (MSG_NOTE, vect_location,
>>> "Building parent vector operands from "
>>> "scalars instead\n");
>>> oprnd_info->def_stmts = vNULL;
>>> ! vect_free_slp_tree (child);
>>> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
>>> continue;
>>> }
>>> }
>>> --- 944,958 ----
>>> *max_nunits = old_max_nunits;
>>> loads->truncate (old_nloads);
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
>>> ! vect_free_slp_tree (grandchild);
>>> SLP_TREE_CHILDREN (child).truncate (0);
>>> dump_printf_loc (MSG_NOTE, vect_location,
>>> "Building parent vector operands from "
>>> "scalars instead\n");
>>> oprnd_info->def_stmts = vNULL;
>>> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
>>> ! SLP_TREE_CHILDREN (*node).quick_push (child);
>>> continue;
>>> }
>>> }
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 992,999 ****
>>> dump_printf_loc (MSG_NOTE, vect_location,
>>> "Building vector operands from scalars\n");
>>> oprnd_info->def_stmts = vNULL;
>>> ! vect_free_slp_tree (child);
>>> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
>>> continue;
>>> }
>>> --- 990,997 ----
>>> dump_printf_loc (MSG_NOTE, vect_location,
>>> "Building vector operands from scalars\n");
>>> oprnd_info->def_stmts = vNULL;
>>> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
>>> ! SLP_TREE_CHILDREN (*node).quick_push (child);
>>> continue;
>>> }
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 1044,1049 ****
>>> --- 1042,1061 ----
>>> tem, npermutes, &this_tree_size,
>>> max_tree_size))
>>> {
>>> + /* ... so if successful we can apply the operand swapping
>>> + to the GIMPLE IL. This is necessary because for example
>>> + vect_get_slp_defs uses operand indexes and thus expects
>>> + canonical operand order. This is also necessary even
>>> + if we end up building the operand from scalars as
>>> + we'll continue to process swapped operand two. */
>>> + for (j = 0; j < group_size; ++j)
>>> + if (!matches[j])
>>> + {
>>> + gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
>>> + swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
>>> + gimple_assign_rhs2_ptr (stmt));
>>> + }
>>> +
>>> /* If we have all children of child built up from scalars then
>>> just throw that away and build it up this node from scalars.
>>> */
>>> if (!SLP_TREE_CHILDREN (child).is_empty ())
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 1052,1058 ****
>>> slp_tree grandchild;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! if (grandchild != NULL)
>>> break;
>>> if (!grandchild)
>>> {
>>> --- 1064,1070 ----
>>> slp_tree grandchild;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>>> break;
>>> if (!grandchild)
>>> {
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 1067,1089 ****
>>> "Building parent vector operands from "
>>> "scalars instead\n");
>>> oprnd_info->def_stmts = vNULL;
>>> ! vect_free_slp_tree (child);
>>> ! SLP_TREE_CHILDREN (*node).quick_push (NULL);
>>> continue;
>>> }
>>> }
>>> - /* ... so if successful we can apply the operand swapping
>>> - to the GIMPLE IL. This is necessary because for example
>>> - vect_get_slp_defs uses operand indexes and thus expects
>>> - canonical operand order. */
>>> - for (j = 0; j < group_size; ++j)
>>> - if (!matches[j])
>>> - {
>>> - gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
>>> - swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
>>> - gimple_assign_rhs2_ptr (stmt));
>>> - }
>>> oprnd_info->def_stmts = vNULL;
>>> SLP_TREE_CHILDREN (*node).quick_push (child);
>>> continue;
>>> --- 1079,1090 ----
>>> "Building parent vector operands from "
>>> "scalars instead\n");
>>> oprnd_info->def_stmts = vNULL;
>>> ! SLP_TREE_DEF_TYPE (child) = vect_external_def;
>>> ! SLP_TREE_CHILDREN (*node).quick_push (child);
>>> continue;
>>> }
>>> }
>>> oprnd_info->def_stmts = vNULL;
>>> SLP_TREE_CHILDREN (*node).quick_push (child);
>>> continue;
>>> *************** vect_print_slp_tree (int dump_kind, loca
>>> *** 1114,1123 ****
>>> gimple *stmt;
>>> slp_tree child;
>>> ! if (!node)
>>> ! return;
>>> !
>>> ! dump_printf_loc (dump_kind, loc, "node\n");
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> {
>>> dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
>>> --- 1115,1123 ----
>>> gimple *stmt;
>>> slp_tree child;
>>> ! dump_printf_loc (dump_kind, loc, "node%s\n",
>>> ! SLP_TREE_DEF_TYPE (node) != vect_internal_def
>>> ! ? " (external)" : "");
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> {
>>> dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
>>> *************** vect_mark_slp_stmts (slp_tree node, enum
>>> *** 1140,1146 ****
>>> gimple *stmt;
>>> slp_tree child;
>>> ! if (!node)
>>> return;
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> --- 1140,1146 ----
>>> gimple *stmt;
>>> slp_tree child;
>>> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>> return;
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> *************** vect_mark_slp_stmts_relevant (slp_tree n
>>> *** 1162,1168 ****
>>> stmt_vec_info stmt_info;
>>> slp_tree child;
>>> ! if (!node)
>>> return;
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> --- 1162,1168 ----
>>> stmt_vec_info stmt_info;
>>> slp_tree child;
>>> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>> return;
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1400,1406 ****
>>> stmt_vector_for_cost *body_cost_vec,
>>> unsigned ncopies_for_cost)
>>> {
>>> ! unsigned i;
>>> slp_tree child;
>>> gimple *stmt, *s;
>>> stmt_vec_info stmt_info;
>>> --- 1400,1406 ----
>>> stmt_vector_for_cost *body_cost_vec,
>>> unsigned ncopies_for_cost)
>>> {
>>> ! unsigned i, j;
>>> slp_tree child;
>>> gimple *stmt, *s;
>>> stmt_vec_info stmt_info;
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1409,1415 ****
>>> /* Recurse down the SLP tree. */
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> ! if (child)
>>> vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>>> body_cost_vec, ncopies_for_cost);
>>> --- 1409,1415 ----
>>> /* Recurse down the SLP tree. */
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>>> vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>>> body_cost_vec, ncopies_for_cost);
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1464,1472 ****
>>> --- 1464,1479 ----
>>> }
>>> }
>>> + /* Push SLP node def-type to stmts. */
>>> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
>>> (child);
>>> +
>>> /* Scan operands and account for prologue cost of constants/externals.
>>> ??? This over-estimates cost for multiple uses and should be
>>> re-engineered. */
>>> + stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>>> lhs = gimple_get_lhs (stmt);
>>> for (i = 0; i < gimple_num_ops (stmt); ++i)
>>> {
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1489,1494 ****
>>> --- 1496,1507 ----
>>> stmt_info, 0, vect_prologue);
>>> }
>>> }
>>> +
>>> + /* Restore stmt def-types. */
>>> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>>> }
>>> /* Compute the cost for the SLP instance INSTANCE. */
>>> *************** vect_analyze_slp_instance (vec_info *vin
>>> *** 1795,1800 ****
>>> --- 1808,1840 ----
>>> }
>>> }
>>> + /* If the loads and stores can be handled with load/store-lane
>>> + instructions do not generate this SLP instance. */
>>> + if (is_a <loop_vec_info> (vinfo)
>>> + && loads_permuted
>>> + && dr && vect_store_lanes_supported (vectype, group_size))
>>> + {
>>> + slp_tree load_node;
>>> + FOR_EACH_VEC_ELT (loads, i, load_node)
>>> + {
>>> + gimple *first_stmt = GROUP_FIRST_ELEMENT
>>> + (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
>>> + stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
>>> + if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE
>>> (stmt_vinfo),
>>> + GROUP_SIZE (stmt_vinfo)))
>>> + break;
>>> + }
>>> + if (i == loads.length ())
>>> + {
>>> + if (dump_enabled_p ())
>>> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> + "Built SLP cancelled: can use "
>>> + "load/store-lanes\n");
>>> + vect_free_slp_instance (new_instance);
>>> + return false;
>>> + }
>>> + }
>>> +
>>> vinfo->slp_instances.safe_push (new_instance);
>>> if (dump_enabled_p ())
>>> *************** vect_detect_hybrid_slp_stmts (slp_tree n
>>> *** 2004,2010 ****
>>> }
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
>>> ! if (child)
>>> vect_detect_hybrid_slp_stmts (child, i, stype);
>>> }
>>> --- 2044,2050 ----
>>> }
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
>>> ! if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
>>> vect_detect_hybrid_slp_stmts (child, i, stype);
>>> }
>>> *************** static bool
>>> *** 2185,2201 ****
>>> vect_slp_analyze_node_operations (slp_tree node)
>>> {
>>> bool dummy;
>>> ! int i;
>>> gimple *stmt;
>>> slp_tree child;
>>> ! if (!node)
>>> return true;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> if (!vect_slp_analyze_node_operations (child))
>>> return false;
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> {
>>> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>>> --- 2225,2248 ----
>>> vect_slp_analyze_node_operations (slp_tree node)
>>> {
>>> bool dummy;
>>> ! int i, j;
>>> gimple *stmt;
>>> slp_tree child;
>>> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>> return true;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> if (!vect_slp_analyze_node_operations (child))
>>> return false;
>>> + /* Push SLP node def-type to stmts. */
>>> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
>>> (child);
>>> +
>>> + bool res = true;
>>> FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> {
>>> stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>>> *************** vect_slp_analyze_node_operations (slp_tr
>>> *** 2203,2212 ****
>>> gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>>> if (!vect_analyze_stmt (stmt, &dummy, node))
>>> ! return false;
>>> }
>>> ! return true;
>>> }
>>> --- 2250,2268 ----
>>> gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>>> if (!vect_analyze_stmt (stmt, &dummy, node))
>>> ! {
>>> ! res = false;
>>> ! break;
>>> ! }
>>> }
>>> ! /* Restore stmt def-types. */
>>> ! FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> ! if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> ! FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> ! STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>>> !
>>> ! return res;
>>> }
>>> *************** vect_bb_slp_scalar_cost (basic_block bb,
>>> *** 2286,2292 ****
>>> if (!is_gimple_debug (use_stmt)
>>> && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>>> use_stmt)
>>> ! || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
>>> {
>>> (*life)[i] = true;
>>> BREAK_FROM_IMM_USE_STMT (use_iter);
>>> --- 2342,2348 ----
>>> if (!is_gimple_debug (use_stmt)
>>> && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>>> use_stmt)
>>> ! || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
>>> {
>>> (*life)[i] = true;
>>> BREAK_FROM_IMM_USE_STMT (use_iter);
>>> *************** vect_bb_slp_scalar_cost (basic_block bb,
>>> *** 2310,2316 ****
>>> }
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> ! if (child)
>>> scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>>> return scalar_cost;
>>> --- 2366,2372 ----
>>> }
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>>> scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>>> return scalar_cost;
>>> *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
>>> *** 2499,2513 ****
>>> return NULL;
>>> }
>>> - /* Mark all the statements that we do not want to vectorize. */
>>> - for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
>>> - gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
>>> - {
>>> - stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
>>> - if (STMT_SLP_TYPE (vinfo) != pure_slp)
>>> - STMT_VINFO_VECTORIZABLE (vinfo) = false;
>>> - }
>>> -
>>> if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
>>> BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
>>> {
>>> --- 2555,2560 ----
>>> *************** vect_get_slp_defs (vec<tree> ops, slp_tr
>>> *** 3085,3091 ****
>>> child = SLP_TREE_CHILDREN (slp_node)[child_index];
>>> /* We have to check both pattern and original def, if
>>> available. */
>>> ! if (child)
>>> {
>>> gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>>> gimple *related
>>> --- 3132,3138 ----
>>> child = SLP_TREE_CHILDREN (slp_node)[child_index];
>>> /* We have to check both pattern and original def, if
>>> available. */
>>> ! if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>>> {
>>> gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>>> gimple *related
>>> *************** vect_schedule_slp_instance (slp_tree nod
>>> *** 3374,3388 ****
>>> stmt_vec_info stmt_info;
>>> unsigned int vec_stmts_size, nunits, group_size;
>>> tree vectype;
>>> ! int i;
>>> slp_tree child;
>>> ! if (!node)
>>> return false;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> vect_schedule_slp_instance (child, instance, vectorization_factor);
>>> stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>>> stmt_info = vinfo_for_stmt (stmt);
>>> --- 3421,3441 ----
>>> stmt_vec_info stmt_info;
>>> unsigned int vec_stmts_size, nunits, group_size;
>>> tree vectype;
>>> ! int i, j;
>>> slp_tree child;
>>> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>> return false;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> vect_schedule_slp_instance (child, instance, vectorization_factor);
>>> + /* Push SLP node def-type to stmts. */
>>> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
>>> (child);
>>> +
>>> stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>>> stmt_info = vinfo_for_stmt (stmt);
>>> *************** vect_schedule_slp_instance (slp_tree nod
>>> *** 3501,3506 ****
>>> --- 3554,3566 ----
>>> }
>>> }
>>> is_store = vect_transform_stmt (stmt, &si, &grouped_store, node,
>>> instance);
>>> +
>>> + /* Restore stmt def-types. */
>>> + FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> + if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>>> +
>>> return is_store;
>>> }
>>> *************** vect_remove_slp_scalar_calls (slp_tree n
>>> *** 3519,3525 ****
>>> tree lhs;
>>> stmt_vec_info stmt_info;
>>> ! if (!node)
>>> return;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> --- 3579,3585 ----
>>> tree lhs;
>>> stmt_vec_info stmt_info;
>>> ! if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>> return;
>>> FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>
>>
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2015-12-18 9:09 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-14 15:14 [PATCH] Fix PR68852 Richard Biener
2015-12-14 15:34 ` Richard Biener
2015-12-17 16:31 ` Kyrill Tkachov
2015-12-18 8:57 ` Richard Biener
2015-12-18 9:09 ` Kyrill Tkachov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).