public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR68852
@ 2015-12-14 15:14 Richard Biener
  2015-12-14 15:34 ` Richard Biener
  2015-12-17 16:31 ` Kyrill Tkachov
  0 siblings, 2 replies; 5+ messages in thread
From: Richard Biener @ 2015-12-14 15:14 UTC (permalink / raw)
  To: gcc-patches


The following fixes PR68852 - so I finally needed to sit down and
fix the "build-from-scalars" hack in the SLP vectorizer by pretending
we'd have a sane vectorizer IL.  Basically I now mark the SLP node
with a proper vect_def_type but I have to push that down to the
stmt-info level whenever sth would look at it.

It's a bit ugly but not too much yet ;)

Anyway, the proper fix is to have a sane data structure, nothing for
GCC 6 though.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

Verified SPEC CPU 2006 is happy with the patch.

Richard.

2015-12-14  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/68852
	* tree-vectorizer.h (struct _slp_tree): Add def_type member.
	(SLP_TREE_DEF_TYPE): New accessor.
	* tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
	hack.
	* tree-vect-slp.c (vect_create_new_slp_node): Initialize
	SLP_TREE_DEF_TYPE.
	(vect_build_slp_tree): When a node is to be built up from scalars
	do not push a NULL as child but instead set its def_type to
	vect_external_def.
	(vect_analyze_slp_cost_1): Check for child def-type instead
	of NULL.
	(vect_detect_hybrid_slp_stmts): Likewise.
	(vect_bb_slp_scalar_cost): Likewise.
	(vect_get_slp_defs): Likewise.
	(vect_slp_analyze_node_operations): Likewise.  Before
	processing node push the children def-types to the underlying
	stmts vinfo and restore it afterwards.
	(vect_schedule_slp_instance): Likewise.
	(vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
	as not vectorizable.

	* g++.dg/torture/pr68852.C: New testcase.

Index: gcc/tree-vectorizer.h
===================================================================
*** gcc/tree-vectorizer.h	(revision 231552)
--- gcc/tree-vectorizer.h	(working copy)
*************** struct _slp_tree {
*** 107,112 ****
--- 107,114 ----
    unsigned int vec_stmts_size;
    /* Whether the scalar computations use two different operators.  */
    bool two_operators;
+   /* The DEF type of this node.  */
+   enum vect_def_type def_type;
  };
  
  
*************** typedef struct _slp_instance {
*** 139,144 ****
--- 141,147 ----
  #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
  #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
  #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
+ #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
  
  
  
Index: gcc/tree-vect-stmts.c
===================================================================
*** gcc/tree-vect-stmts.c	(revision 231552)
--- gcc/tree-vect-stmts.c	(working copy)
*************** vect_is_simple_use (tree operand, vec_in
*** 8649,8658 ****
    else
      {
        stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
!       if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
! 	*dt = vect_external_def;
!       else
! 	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
      }
  
    if (dump_enabled_p ())
--- 8652,8658 ----
    else
      {
        stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
!       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
      }
  
    if (dump_enabled_p ())
Index: gcc/testsuite/g++.dg/torture/pr68852.C
===================================================================
--- gcc/testsuite/g++.dg/torture/pr68852.C	(revision 0)
+++ gcc/testsuite/g++.dg/torture/pr68852.C	(working copy)
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+
+struct A {
+    double x, y, z, w;
+    A() {}
+    A(double, double p2, double p3, double) : y(p2), z(p3) {}
+    void m_fn1();
+};
+
+struct B {
+    double x, y;
+};
+struct D : A {
+    D() {}
+    D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
+};
+
+class C {
+public:
+    float _11, _12, _13, _14;
+    float _21, _22, _23, _24;
+    float _31, _32, _33, _34;
+    float _41, _42, _43, _44;
+    D m_fn2(B p1) {
+	double z(p1.x + _43);
+	return *this * D(p1.x, p1.y, z, 1);
+    }
+    int ProjectRectBounds_next;
+    B __trans_tmp_3;
+    int m_fn3(int) {
+	B a, b;
+	D c[1];
+	b = __trans_tmp_3;
+	c[2] = m_fn2(b);
+	c[3] = m_fn2(a);
+	c[ProjectRectBounds_next].m_fn1();
+    }
+    D operator*(D p1) {
+	D d;
+	d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
+	d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
+	d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
+	d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
+	return d;
+    }
+};
+
+void fn1() {
+    C e;
+    int f = e.m_fn3(f);
+}
Index: gcc/tree-vect-slp.c
===================================================================
*** gcc/tree-vect-slp.c	(revision 231610)
--- gcc/tree-vect-slp.c	(working copy)
*************** vect_free_slp_tree (slp_tree node)
*** 51,59 ****
    int i;
    slp_tree child;
  
-   if (!node)
-     return;
- 
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
      vect_free_slp_tree (child);
  
--- 51,56 ----
*************** vect_create_new_slp_node (vec<gimple *>
*** 103,108 ****
--- 100,106 ----
    SLP_TREE_CHILDREN (node).create (nops);
    SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
    SLP_TREE_TWO_OPERATORS (node) = false;
+   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
  
    return node;
  }
*************** vect_build_slp_tree (vec_info *vinfo,
*** 938,944 ****
  	      slp_tree grandchild;
  
  	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! 		if (grandchild != NULL)
  		  break;
  	      if (!grandchild)
  		{
--- 936,942 ----
  	      slp_tree grandchild;
  
  	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
  		  break;
  	      if (!grandchild)
  		{
*************** vect_build_slp_tree (vec_info *vinfo,
*** 946,960 ****
  		  *max_nunits = old_max_nunits;
  		  loads->truncate (old_nloads);
  		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! 		      vect_free_slp_tree (grandchild);
  		  SLP_TREE_CHILDREN (child).truncate (0);
  
  		  dump_printf_loc (MSG_NOTE, vect_location,
  				   "Building parent vector operands from "
  				   "scalars instead\n");
  		  oprnd_info->def_stmts = vNULL;
! 		  vect_free_slp_tree (child);
! 		  SLP_TREE_CHILDREN (*node).quick_push (NULL);
  		  continue;
  		}
  	    }
--- 944,958 ----
  		  *max_nunits = old_max_nunits;
  		  loads->truncate (old_nloads);
  		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! 		    vect_free_slp_tree (grandchild);
  		  SLP_TREE_CHILDREN (child).truncate (0);
  
  		  dump_printf_loc (MSG_NOTE, vect_location,
  				   "Building parent vector operands from "
  				   "scalars instead\n");
  		  oprnd_info->def_stmts = vNULL;
! 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
! 		  SLP_TREE_CHILDREN (*node).quick_push (child);
  		  continue;
  		}
  	    }
*************** vect_build_slp_tree (vec_info *vinfo,
*** 992,999 ****
  	  dump_printf_loc (MSG_NOTE, vect_location,
  			   "Building vector operands from scalars\n");
  	  oprnd_info->def_stmts = vNULL;
! 	  vect_free_slp_tree (child);
! 	  SLP_TREE_CHILDREN (*node).quick_push (NULL);
  	  continue;
  	}
  
--- 990,997 ----
  	  dump_printf_loc (MSG_NOTE, vect_location,
  			   "Building vector operands from scalars\n");
  	  oprnd_info->def_stmts = vNULL;
! 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
! 	  SLP_TREE_CHILDREN (*node).quick_push (child);
  	  continue;
  	}
  
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1044,1049 ****
--- 1042,1061 ----
  				   tem, npermutes, &this_tree_size,
  				   max_tree_size))
  	    {
+ 	      /* ... so if successful we can apply the operand swapping
+ 		 to the GIMPLE IL.  This is necessary because for example
+ 		 vect_get_slp_defs uses operand indexes and thus expects
+ 		 canonical operand order.  This is also necessary even
+ 		 if we end up building the operand from scalars as
+ 		 we'll continue to process swapped operand two.  */
+ 	      for (j = 0; j < group_size; ++j)
+ 		if (!matches[j])
+ 		  {
+ 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
+ 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+ 				       gimple_assign_rhs2_ptr (stmt));
+ 		  }
+ 
  	      /* If we have all children of child built up from scalars then
  		 just throw that away and build it up this node from scalars.  */
  	      if (!SLP_TREE_CHILDREN (child).is_empty ())
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1052,1058 ****
  		  slp_tree grandchild;
  
  		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! 		    if (grandchild != NULL)
  		      break;
  		  if (!grandchild)
  		    {
--- 1064,1070 ----
  		  slp_tree grandchild;
  
  		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
! 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
  		      break;
  		  if (!grandchild)
  		    {
*************** vect_build_slp_tree (vec_info *vinfo,
*** 1067,1089 ****
  				       "Building parent vector operands from "
  				       "scalars instead\n");
  		      oprnd_info->def_stmts = vNULL;
! 		      vect_free_slp_tree (child);
! 		      SLP_TREE_CHILDREN (*node).quick_push (NULL);
  		      continue;
  		    }
  		}
  
- 	      /* ... so if successful we can apply the operand swapping
- 		 to the GIMPLE IL.  This is necessary because for example
- 		 vect_get_slp_defs uses operand indexes and thus expects
- 		 canonical operand order.  */
- 	      for (j = 0; j < group_size; ++j)
- 		if (!matches[j])
- 		  {
- 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
- 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
- 				       gimple_assign_rhs2_ptr (stmt));
- 		  }
  	      oprnd_info->def_stmts = vNULL;
  	      SLP_TREE_CHILDREN (*node).quick_push (child);
  	      continue;
--- 1079,1090 ----
  				       "Building parent vector operands from "
  				       "scalars instead\n");
  		      oprnd_info->def_stmts = vNULL;
! 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
! 		      SLP_TREE_CHILDREN (*node).quick_push (child);
  		      continue;
  		    }
  		}
  
  	      oprnd_info->def_stmts = vNULL;
  	      SLP_TREE_CHILDREN (*node).quick_push (child);
  	      continue;
*************** vect_print_slp_tree (int dump_kind, loca
*** 1114,1123 ****
    gimple *stmt;
    slp_tree child;
  
!   if (!node)
!     return;
! 
!   dump_printf_loc (dump_kind, loc, "node\n");
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
      {
        dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
--- 1115,1123 ----
    gimple *stmt;
    slp_tree child;
  
!   dump_printf_loc (dump_kind, loc, "node%s\n",
! 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
! 		   ? " (external)" : "");
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
      {
        dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
*************** vect_mark_slp_stmts (slp_tree node, enum
*** 1140,1146 ****
    gimple *stmt;
    slp_tree child;
  
!   if (!node)
      return;
  
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
--- 1140,1146 ----
    gimple *stmt;
    slp_tree child;
  
!   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
      return;
  
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
*************** vect_mark_slp_stmts_relevant (slp_tree n
*** 1162,1168 ****
    stmt_vec_info stmt_info;
    slp_tree child;
  
!   if (!node)
      return;
  
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
--- 1162,1168 ----
    stmt_vec_info stmt_info;
    slp_tree child;
  
!   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
      return;
  
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1400,1406 ****
  			 stmt_vector_for_cost *body_cost_vec,
  			 unsigned ncopies_for_cost)
  {
!   unsigned i;
    slp_tree child;
    gimple *stmt, *s;
    stmt_vec_info stmt_info;
--- 1400,1406 ----
  			 stmt_vector_for_cost *body_cost_vec,
  			 unsigned ncopies_for_cost)
  {
!   unsigned i, j;
    slp_tree child;
    gimple *stmt, *s;
    stmt_vec_info stmt_info;
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1409,1415 ****
  
    /* Recurse down the SLP tree.  */
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (child)
        vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
  			       body_cost_vec, ncopies_for_cost);
  
--- 1409,1415 ----
  
    /* Recurse down the SLP tree.  */
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
        vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
  			       body_cost_vec, ncopies_for_cost);
  
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1464,1472 ****
--- 1464,1479 ----
  	}
      }
  
+   /* Push SLP node def-type to stmts.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
+ 
    /* Scan operands and account for prologue cost of constants/externals.
       ???  This over-estimates cost for multiple uses and should be
       re-engineered.  */
+   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
    lhs = gimple_get_lhs (stmt);
    for (i = 0; i < gimple_num_ops (stmt); ++i)
      {
*************** vect_analyze_slp_cost_1 (slp_instance in
*** 1489,1494 ****
--- 1496,1507 ----
  			      stmt_info, 0, vect_prologue);
  	}
      }
+ 
+   /* Restore stmt def-types.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
  }
  
  /* Compute the cost for the SLP instance INSTANCE.  */
*************** vect_analyze_slp_instance (vec_info *vin
*** 1795,1800 ****
--- 1808,1840 ----
              }
          }
  
+       /* If the loads and stores can be handled with load/store-lane
+          instructions do not generate this SLP instance.  */
+       if (is_a <loop_vec_info> (vinfo)
+ 	  && loads_permuted
+ 	  && dr && vect_store_lanes_supported (vectype, group_size))
+ 	{
+ 	  slp_tree load_node;
+ 	  FOR_EACH_VEC_ELT (loads, i, load_node)
+ 	    {
+ 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
+ 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
+ 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
+ 	      if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo),
+ 					       GROUP_SIZE (stmt_vinfo)))
+ 		break;
+ 	    }
+ 	  if (i == loads.length ())
+ 	    {
+ 	      if (dump_enabled_p ())
+ 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 				 "Built SLP cancelled: can use "
+ 				 "load/store-lanes\n");
+               vect_free_slp_instance (new_instance);
+               return false;
+ 	    }
+ 	}
+ 
        vinfo->slp_instances.safe_push (new_instance);
  
        if (dump_enabled_p ())
*************** vect_detect_hybrid_slp_stmts (slp_tree n
*** 2004,2010 ****
      }
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
!     if (child)
        vect_detect_hybrid_slp_stmts (child, i, stype);
  }
  
--- 2044,2050 ----
      }
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
!     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
        vect_detect_hybrid_slp_stmts (child, i, stype);
  }
  
*************** static bool
*** 2185,2201 ****
  vect_slp_analyze_node_operations (slp_tree node)
  {
    bool dummy;
!   int i;
    gimple *stmt;
    slp_tree child;
  
!   if (!node)
      return true;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
      if (!vect_slp_analyze_node_operations (child))
        return false;
  
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
      {
        stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
--- 2225,2248 ----
  vect_slp_analyze_node_operations (slp_tree node)
  {
    bool dummy;
!   int i, j;
    gimple *stmt;
    slp_tree child;
  
!   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
      return true;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
      if (!vect_slp_analyze_node_operations (child))
        return false;
  
+   /* Push SLP node def-type to stmts.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
+ 
+   bool res = true;
    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
      {
        stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
*************** vect_slp_analyze_node_operations (slp_tr
*** 2203,2212 ****
        gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
  
        if (!vect_analyze_stmt (stmt, &dummy, node))
! 	return false;
      }
  
!   return true;
  }
  
  
--- 2250,2268 ----
        gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
  
        if (!vect_analyze_stmt (stmt, &dummy, node))
! 	{
! 	  res = false;
! 	  break;
! 	}
      }
  
!   /* Restore stmt def-types.  */
!   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
!       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
! 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
! 
!   return res;
  }
  
  
*************** vect_bb_slp_scalar_cost (basic_block bb,
*** 2286,2292 ****
  	    if (!is_gimple_debug (use_stmt)
  		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
  					     use_stmt)
! 		    || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
  	      {
  		(*life)[i] = true;
  		BREAK_FROM_IMM_USE_STMT (use_iter);
--- 2342,2348 ----
  	    if (!is_gimple_debug (use_stmt)
  		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
  					     use_stmt)
! 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
  	      {
  		(*life)[i] = true;
  		BREAK_FROM_IMM_USE_STMT (use_iter);
*************** vect_bb_slp_scalar_cost (basic_block bb,
*** 2310,2316 ****
      }
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (child)
        scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
  
    return scalar_cost;
--- 2366,2372 ----
      }
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
!     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
        scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
  
    return scalar_cost;
*************** vect_slp_analyze_bb_1 (gimple_stmt_itera
*** 2499,2513 ****
        return NULL;
      }
  
-   /* Mark all the statements that we do not want to vectorize.  */
-   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
-        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
-     {
-       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
-       if (STMT_SLP_TYPE (vinfo) != pure_slp)
- 	STMT_VINFO_VECTORIZABLE (vinfo) = false;
-     }
- 
    if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
  				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
      {
--- 2555,2560 ----
*************** vect_get_slp_defs (vec<tree> ops, slp_tr
*** 3085,3091 ****
            child = SLP_TREE_CHILDREN (slp_node)[child_index];
  
  	  /* We have to check both pattern and original def, if available.  */
! 	  if (child)
  	    {
  	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
  	      gimple *related
--- 3132,3138 ----
            child = SLP_TREE_CHILDREN (slp_node)[child_index];
  
  	  /* We have to check both pattern and original def, if available.  */
! 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
  	    {
  	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
  	      gimple *related
*************** vect_schedule_slp_instance (slp_tree nod
*** 3374,3388 ****
    stmt_vec_info stmt_info;
    unsigned int vec_stmts_size, nunits, group_size;
    tree vectype;
!   int i;
    slp_tree child;
  
!   if (!node)
      return false;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
      vect_schedule_slp_instance (child, instance, vectorization_factor);
  
    stmt = SLP_TREE_SCALAR_STMTS (node)[0];
    stmt_info = vinfo_for_stmt (stmt);
  
--- 3421,3441 ----
    stmt_vec_info stmt_info;
    unsigned int vec_stmts_size, nunits, group_size;
    tree vectype;
!   int i, j;
    slp_tree child;
  
!   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
      return false;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
      vect_schedule_slp_instance (child, instance, vectorization_factor);
  
+   /* Push SLP node def-type to stmts.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
+ 
    stmt = SLP_TREE_SCALAR_STMTS (node)[0];
    stmt_info = vinfo_for_stmt (stmt);
  
*************** vect_schedule_slp_instance (slp_tree nod
*** 3501,3506 ****
--- 3554,3566 ----
  	}
      }
    is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
+ 
+   /* Restore stmt def-types.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
+ 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
+ 
    return is_store;
  }
  
*************** vect_remove_slp_scalar_calls (slp_tree n
*** 3519,3525 ****
    tree lhs;
    stmt_vec_info stmt_info;
  
!   if (!node)
      return;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
--- 3579,3585 ----
    tree lhs;
    stmt_vec_info stmt_info;
  
!   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
      return;
  
    FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fix PR68852
  2015-12-14 15:14 [PATCH] Fix PR68852 Richard Biener
@ 2015-12-14 15:34 ` Richard Biener
  2015-12-17 16:31 ` Kyrill Tkachov
  1 sibling, 0 replies; 5+ messages in thread
From: Richard Biener @ 2015-12-14 15:34 UTC (permalink / raw)
  To: gcc-patches

On Mon, 14 Dec 2015, Richard Biener wrote:

> 
> The following fixes PR68852 - so I finally needed to sit down and
> fix the "build-from-scalars" hack in the SLP vectorizer by pretending
> we'd have a sane vectorizer IL.  Basically I now mark the SLP node
> with a proper vect_def_type but I have to push that down to the
> stmt-info level whenever sth would look at it.
> 
> It's a bit ugly but not too much yet ;)
> 
> Anyway, the proper fix is to have a sane data structure, nothing for
> GCC 6 though.
> 
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> 
> Verified SPEC CPU 2006 is happy with the patch.

Ick.  I reverted the acidentially applied fix for PR68707 that went
with this patch.  The other unrelated hunk was already applied
as fix for PR68775.

Richard.

> Richard.
> 
> 2015-12-14  Richard Biener  <rguenther@suse.de>
> 
> 	PR tree-optimization/68852
> 	* tree-vectorizer.h (struct _slp_tree): Add def_type member.
> 	(SLP_TREE_DEF_TYPE): New accessor.
> 	* tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
> 	hack.
> 	* tree-vect-slp.c (vect_create_new_slp_node): Initialize
> 	SLP_TREE_DEF_TYPE.
> 	(vect_build_slp_tree): When a node is to be built up from scalars
> 	do not push a NULL as child but instead set its def_type to
> 	vect_external_def.
> 	(vect_analyze_slp_cost_1): Check for child def-type instead
> 	of NULL.
> 	(vect_detect_hybrid_slp_stmts): Likewise.
> 	(vect_bb_slp_scalar_cost): Likewise.
> 	(vect_get_slp_defs): Likewise.
> 	(vect_slp_analyze_node_operations): Likewise.  Before
> 	processing node push the children def-types to the underlying
> 	stmts vinfo and restore it afterwards.
> 	(vect_schedule_slp_instance): Likewise.
> 	(vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
> 	as not vectorizable.
> 
> 	* g++.dg/torture/pr68852.C: New testcase.
> 
> Index: gcc/tree-vectorizer.h
> ===================================================================
> *** gcc/tree-vectorizer.h	(revision 231552)
> --- gcc/tree-vectorizer.h	(working copy)
> *************** struct _slp_tree {
> *** 107,112 ****
> --- 107,114 ----
>     unsigned int vec_stmts_size;
>     /* Whether the scalar computations use two different operators.  */
>     bool two_operators;
> +   /* The DEF type of this node.  */
> +   enum vect_def_type def_type;
>   };
>   
>   
> *************** typedef struct _slp_instance {
> *** 139,144 ****
> --- 141,147 ----
>   #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
>   #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
>   #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
> + #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
>   
>   
>   
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c	(revision 231552)
> --- gcc/tree-vect-stmts.c	(working copy)
> *************** vect_is_simple_use (tree operand, vec_in
> *** 8649,8658 ****
>     else
>       {
>         stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> !       if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
> ! 	*dt = vect_external_def;
> !       else
> ! 	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>       }
>   
>     if (dump_enabled_p ())
> --- 8652,8658 ----
>     else
>       {
>         stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> !       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>       }
>   
>     if (dump_enabled_p ())
> Index: gcc/testsuite/g++.dg/torture/pr68852.C
> ===================================================================
> --- gcc/testsuite/g++.dg/torture/pr68852.C	(revision 0)
> +++ gcc/testsuite/g++.dg/torture/pr68852.C	(working copy)
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +
> +struct A {
> +    double x, y, z, w;
> +    A() {}
> +    A(double, double p2, double p3, double) : y(p2), z(p3) {}
> +    void m_fn1();
> +};
> +
> +struct B {
> +    double x, y;
> +};
> +struct D : A {
> +    D() {}
> +    D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
> +};
> +
> +class C {
> +public:
> +    float _11, _12, _13, _14;
> +    float _21, _22, _23, _24;
> +    float _31, _32, _33, _34;
> +    float _41, _42, _43, _44;
> +    D m_fn2(B p1) {
> +	double z(p1.x + _43);
> +	return *this * D(p1.x, p1.y, z, 1);
> +    }
> +    int ProjectRectBounds_next;
> +    B __trans_tmp_3;
> +    int m_fn3(int) {
> +	B a, b;
> +	D c[1];
> +	b = __trans_tmp_3;
> +	c[2] = m_fn2(b);
> +	c[3] = m_fn2(a);
> +	c[ProjectRectBounds_next].m_fn1();
> +    }
> +    D operator*(D p1) {
> +	D d;
> +	d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
> +	d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
> +	d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
> +	d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
> +	return d;
> +    }
> +};
> +
> +void fn1() {
> +    C e;
> +    int f = e.m_fn3(f);
> +}
> Index: gcc/tree-vect-slp.c
> ===================================================================
> *** gcc/tree-vect-slp.c	(revision 231610)
> --- gcc/tree-vect-slp.c	(working copy)
> *************** vect_free_slp_tree (slp_tree node)
> *** 51,59 ****
>     int i;
>     slp_tree child;
>   
> -   if (!node)
> -     return;
> - 
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>       vect_free_slp_tree (child);
>   
> --- 51,56 ----
> *************** vect_create_new_slp_node (vec<gimple *>
> *** 103,108 ****
> --- 100,106 ----
>     SLP_TREE_CHILDREN (node).create (nops);
>     SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
>     SLP_TREE_TWO_OPERATORS (node) = false;
> +   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
>   
>     return node;
>   }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 938,944 ****
>   	      slp_tree grandchild;
>   
>   	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		if (grandchild != NULL)
>   		  break;
>   	      if (!grandchild)
>   		{
> --- 936,942 ----
>   	      slp_tree grandchild;
>   
>   	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>   		  break;
>   	      if (!grandchild)
>   		{
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 946,960 ****
>   		  *max_nunits = old_max_nunits;
>   		  loads->truncate (old_nloads);
>   		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		      vect_free_slp_tree (grandchild);
>   		  SLP_TREE_CHILDREN (child).truncate (0);
>   
>   		  dump_printf_loc (MSG_NOTE, vect_location,
>   				   "Building parent vector operands from "
>   				   "scalars instead\n");
>   		  oprnd_info->def_stmts = vNULL;
> ! 		  vect_free_slp_tree (child);
> ! 		  SLP_TREE_CHILDREN (*node).quick_push (NULL);
>   		  continue;
>   		}
>   	    }
> --- 944,958 ----
>   		  *max_nunits = old_max_nunits;
>   		  loads->truncate (old_nloads);
>   		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		    vect_free_slp_tree (grandchild);
>   		  SLP_TREE_CHILDREN (child).truncate (0);
>   
>   		  dump_printf_loc (MSG_NOTE, vect_location,
>   				   "Building parent vector operands from "
>   				   "scalars instead\n");
>   		  oprnd_info->def_stmts = vNULL;
> ! 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! 		  SLP_TREE_CHILDREN (*node).quick_push (child);
>   		  continue;
>   		}
>   	    }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 992,999 ****
>   	  dump_printf_loc (MSG_NOTE, vect_location,
>   			   "Building vector operands from scalars\n");
>   	  oprnd_info->def_stmts = vNULL;
> ! 	  vect_free_slp_tree (child);
> ! 	  SLP_TREE_CHILDREN (*node).quick_push (NULL);
>   	  continue;
>   	}
>   
> --- 990,997 ----
>   	  dump_printf_loc (MSG_NOTE, vect_location,
>   			   "Building vector operands from scalars\n");
>   	  oprnd_info->def_stmts = vNULL;
> ! 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! 	  SLP_TREE_CHILDREN (*node).quick_push (child);
>   	  continue;
>   	}
>   
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1044,1049 ****
> --- 1042,1061 ----
>   				   tem, npermutes, &this_tree_size,
>   				   max_tree_size))
>   	    {
> + 	      /* ... so if successful we can apply the operand swapping
> + 		 to the GIMPLE IL.  This is necessary because for example
> + 		 vect_get_slp_defs uses operand indexes and thus expects
> + 		 canonical operand order.  This is also necessary even
> + 		 if we end up building the operand from scalars as
> + 		 we'll continue to process swapped operand two.  */
> + 	      for (j = 0; j < group_size; ++j)
> + 		if (!matches[j])
> + 		  {
> + 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> + 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> + 				       gimple_assign_rhs2_ptr (stmt));
> + 		  }
> + 
>   	      /* If we have all children of child built up from scalars then
>   		 just throw that away and build it up this node from scalars.  */
>   	      if (!SLP_TREE_CHILDREN (child).is_empty ())
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1052,1058 ****
>   		  slp_tree grandchild;
>   
>   		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		    if (grandchild != NULL)
>   		      break;
>   		  if (!grandchild)
>   		    {
> --- 1064,1070 ----
>   		  slp_tree grandchild;
>   
>   		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>   		      break;
>   		  if (!grandchild)
>   		    {
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1067,1089 ****
>   				       "Building parent vector operands from "
>   				       "scalars instead\n");
>   		      oprnd_info->def_stmts = vNULL;
> ! 		      vect_free_slp_tree (child);
> ! 		      SLP_TREE_CHILDREN (*node).quick_push (NULL);
>   		      continue;
>   		    }
>   		}
>   
> - 	      /* ... so if successful we can apply the operand swapping
> - 		 to the GIMPLE IL.  This is necessary because for example
> - 		 vect_get_slp_defs uses operand indexes and thus expects
> - 		 canonical operand order.  */
> - 	      for (j = 0; j < group_size; ++j)
> - 		if (!matches[j])
> - 		  {
> - 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> - 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> - 				       gimple_assign_rhs2_ptr (stmt));
> - 		  }
>   	      oprnd_info->def_stmts = vNULL;
>   	      SLP_TREE_CHILDREN (*node).quick_push (child);
>   	      continue;
> --- 1079,1090 ----
>   				       "Building parent vector operands from "
>   				       "scalars instead\n");
>   		      oprnd_info->def_stmts = vNULL;
> ! 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! 		      SLP_TREE_CHILDREN (*node).quick_push (child);
>   		      continue;
>   		    }
>   		}
>   
>   	      oprnd_info->def_stmts = vNULL;
>   	      SLP_TREE_CHILDREN (*node).quick_push (child);
>   	      continue;
> *************** vect_print_slp_tree (int dump_kind, loca
> *** 1114,1123 ****
>     gimple *stmt;
>     slp_tree child;
>   
> !   if (!node)
> !     return;
> ! 
> !   dump_printf_loc (dump_kind, loc, "node\n");
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>       {
>         dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> --- 1115,1123 ----
>     gimple *stmt;
>     slp_tree child;
>   
> !   dump_printf_loc (dump_kind, loc, "node%s\n",
> ! 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
> ! 		   ? " (external)" : "");
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>       {
>         dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> *************** vect_mark_slp_stmts (slp_tree node, enum
> *** 1140,1146 ****
>     gimple *stmt;
>     slp_tree child;
>   
> !   if (!node)
>       return;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1140,1146 ----
>     gimple *stmt;
>     slp_tree child;
>   
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>       return;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_mark_slp_stmts_relevant (slp_tree n
> *** 1162,1168 ****
>     stmt_vec_info stmt_info;
>     slp_tree child;
>   
> !   if (!node)
>       return;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1162,1168 ----
>     stmt_vec_info stmt_info;
>     slp_tree child;
>   
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>       return;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1400,1406 ****
>   			 stmt_vector_for_cost *body_cost_vec,
>   			 unsigned ncopies_for_cost)
>   {
> !   unsigned i;
>     slp_tree child;
>     gimple *stmt, *s;
>     stmt_vec_info stmt_info;
> --- 1400,1406 ----
>   			 stmt_vector_for_cost *body_cost_vec,
>   			 unsigned ncopies_for_cost)
>   {
> !   unsigned i, j;
>     slp_tree child;
>     gimple *stmt, *s;
>     stmt_vec_info stmt_info;
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1409,1415 ****
>   
>     /* Recurse down the SLP tree.  */
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (child)
>         vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>   			       body_cost_vec, ncopies_for_cost);
>   
> --- 1409,1415 ----
>   
>     /* Recurse down the SLP tree.  */
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>         vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>   			       body_cost_vec, ncopies_for_cost);
>   
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1464,1472 ****
> --- 1464,1479 ----
>   	}
>       }
>   
> +   /* Push SLP node def-type to stmts.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> + 
>     /* Scan operands and account for prologue cost of constants/externals.
>        ???  This over-estimates cost for multiple uses and should be
>        re-engineered.  */
> +   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>     lhs = gimple_get_lhs (stmt);
>     for (i = 0; i < gimple_num_ops (stmt); ++i)
>       {
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1489,1494 ****
> --- 1496,1507 ----
>   			      stmt_info, 0, vect_prologue);
>   	}
>       }
> + 
> +   /* Restore stmt def-types.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>   }
>   
>   /* Compute the cost for the SLP instance INSTANCE.  */
> *************** vect_analyze_slp_instance (vec_info *vin
> *** 1795,1800 ****
> --- 1808,1840 ----
>               }
>           }
>   
> +       /* If the loads and stores can be handled with load/store-lane
> +          instructions do not generate this SLP instance.  */
> +       if (is_a <loop_vec_info> (vinfo)
> + 	  && loads_permuted
> + 	  && dr && vect_store_lanes_supported (vectype, group_size))
> + 	{
> + 	  slp_tree load_node;
> + 	  FOR_EACH_VEC_ELT (loads, i, load_node)
> + 	    {
> + 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
> + 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> + 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> + 	      if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo),
> + 					       GROUP_SIZE (stmt_vinfo)))
> + 		break;
> + 	    }
> + 	  if (i == loads.length ())
> + 	    {
> + 	      if (dump_enabled_p ())
> + 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + 				 "Built SLP cancelled: can use "
> + 				 "load/store-lanes\n");
> +               vect_free_slp_instance (new_instance);
> +               return false;
> + 	    }
> + 	}
> + 
>         vinfo->slp_instances.safe_push (new_instance);
>   
>         if (dump_enabled_p ())
> *************** vect_detect_hybrid_slp_stmts (slp_tree n
> *** 2004,2010 ****
>       }
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> !     if (child)
>         vect_detect_hybrid_slp_stmts (child, i, stype);
>   }
>   
> --- 2044,2050 ----
>       }
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> !     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
>         vect_detect_hybrid_slp_stmts (child, i, stype);
>   }
>   
> *************** static bool
> *** 2185,2201 ****
>   vect_slp_analyze_node_operations (slp_tree node)
>   {
>     bool dummy;
> !   int i;
>     gimple *stmt;
>     slp_tree child;
>   
> !   if (!node)
>       return true;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>       if (!vect_slp_analyze_node_operations (child))
>         return false;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>       {
>         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> --- 2225,2248 ----
>   vect_slp_analyze_node_operations (slp_tree node)
>   {
>     bool dummy;
> !   int i, j;
>     gimple *stmt;
>     slp_tree child;
>   
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>       return true;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>       if (!vect_slp_analyze_node_operations (child))
>         return false;
>   
> +   /* Push SLP node def-type to stmts.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> + 
> +   bool res = true;
>     FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>       {
>         stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> *************** vect_slp_analyze_node_operations (slp_tr
> *** 2203,2212 ****
>         gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>   
>         if (!vect_analyze_stmt (stmt, &dummy, node))
> ! 	return false;
>       }
>   
> !   return true;
>   }
>   
>   
> --- 2250,2268 ----
>         gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>   
>         if (!vect_analyze_stmt (stmt, &dummy, node))
> ! 	{
> ! 	  res = false;
> ! 	  break;
> ! 	}
>       }
>   
> !   /* Restore stmt def-types.  */
> !   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> !       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> ! 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> ! 
> !   return res;
>   }
>   
>   
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2286,2292 ****
>   	    if (!is_gimple_debug (use_stmt)
>   		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>   					     use_stmt)
> ! 		    || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
>   	      {
>   		(*life)[i] = true;
>   		BREAK_FROM_IMM_USE_STMT (use_iter);
> --- 2342,2348 ----
>   	    if (!is_gimple_debug (use_stmt)
>   		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>   					     use_stmt)
> ! 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
>   	      {
>   		(*life)[i] = true;
>   		BREAK_FROM_IMM_USE_STMT (use_iter);
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2310,2316 ****
>       }
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (child)
>         scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>   
>     return scalar_cost;
> --- 2366,2372 ----
>       }
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>         scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>   
>     return scalar_cost;
> *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
> *** 2499,2513 ****
>         return NULL;
>       }
>   
> -   /* Mark all the statements that we do not want to vectorize.  */
> -   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
> -        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
> -     {
> -       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
> -       if (STMT_SLP_TYPE (vinfo) != pure_slp)
> - 	STMT_VINFO_VECTORIZABLE (vinfo) = false;
> -     }
> - 
>     if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
>   				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
>       {
> --- 2555,2560 ----
> *************** vect_get_slp_defs (vec<tree> ops, slp_tr
> *** 3085,3091 ****
>             child = SLP_TREE_CHILDREN (slp_node)[child_index];
>   
>   	  /* We have to check both pattern and original def, if available.  */
> ! 	  if (child)
>   	    {
>   	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>   	      gimple *related
> --- 3132,3138 ----
>             child = SLP_TREE_CHILDREN (slp_node)[child_index];
>   
>   	  /* We have to check both pattern and original def, if available.  */
> ! 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>   	    {
>   	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>   	      gimple *related
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3374,3388 ****
>     stmt_vec_info stmt_info;
>     unsigned int vec_stmts_size, nunits, group_size;
>     tree vectype;
> !   int i;
>     slp_tree child;
>   
> !   if (!node)
>       return false;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>       vect_schedule_slp_instance (child, instance, vectorization_factor);
>   
>     stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>     stmt_info = vinfo_for_stmt (stmt);
>   
> --- 3421,3441 ----
>     stmt_vec_info stmt_info;
>     unsigned int vec_stmts_size, nunits, group_size;
>     tree vectype;
> !   int i, j;
>     slp_tree child;
>   
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>       return false;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>       vect_schedule_slp_instance (child, instance, vectorization_factor);
>   
> +   /* Push SLP node def-type to stmts.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> + 
>     stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>     stmt_info = vinfo_for_stmt (stmt);
>   
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3501,3506 ****
> --- 3554,3566 ----
>   	}
>       }
>     is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
> + 
> +   /* Restore stmt def-types.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> + 
>     return is_store;
>   }
>   
> *************** vect_remove_slp_scalar_calls (slp_tree n
> *** 3519,3525 ****
>     tree lhs;
>     stmt_vec_info stmt_info;
>   
> !   if (!node)
>       return;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> --- 3579,3585 ----
>     tree lhs;
>     stmt_vec_info stmt_info;
>   
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>       return;
>   
>     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fix PR68852
  2015-12-14 15:14 [PATCH] Fix PR68852 Richard Biener
  2015-12-14 15:34 ` Richard Biener
@ 2015-12-17 16:31 ` Kyrill Tkachov
  2015-12-18  8:57   ` Richard Biener
  1 sibling, 1 reply; 5+ messages in thread
From: Kyrill Tkachov @ 2015-12-17 16:31 UTC (permalink / raw)
  To: Richard Biener, gcc-patches


On 14/12/15 15:14, Richard Biener wrote:
> The following fixes PR68852 - so I finally needed to sit down and
> fix the "build-from-scalars" hack in the SLP vectorizer by pretending
> we'd have a sane vectorizer IL.  Basically I now mark the SLP node
> with a proper vect_def_type but I have to push that down to the
> stmt-info level whenever sth would look at it.
>
> It's a bit ugly but not too much yet ;)
>
> Anyway, the proper fix is to have a sane data structure, nothing for
> GCC 6 though.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Verified SPEC CPU 2006 is happy with the patch.

Unfortunately it's not very happy on aarch64 ;)
416.gamess and the trans.fppized.f in particular ICEs after this patch with

trans.fppized.f:2086:0:

        SUBROUTINE TRFMCX(NPRINT,ICORBS,IORBS,IORB,DOFOCK,DOEXCH,


internal compiler error: in vect_analyze_stmt, at tree-vect-stmts.c:8013
0xd34d1b vect_analyze_stmt(gimple*, bool*, _slp_tree*)
         $SRC/tree-vect-stmts.c:8013
0xd4b64a vect_slp_analyze_node_operations
         $SRC/tree-vect-slp.c:2237
0xd4b533 vect_slp_analyze_node_operations
         $SRC/tree-vect-slp.c:2221
0xd4b533 vect_slp_analyze_node_operations
         $SRC/tree-vect-slp.c:2221
0xd4b533 vect_slp_analyze_node_operations
         $SRC/tree-vect-slp.c:2221
0xd4b533 vect_slp_analyze_node_operations
         $SRC/tree-vect-slp.c:2221
0xd4f7dc vect_slp_analyze_operations(vec<_slp_instance*, va_heap, vl_ptr>, void*)
         $SRC/tree-vect-slp.c:2269
0xd546a0 vect_slp_analyze_bb_1
         $SRC/tree-vect-slp.c:2543
0xd546a0 vect_slp_bb(basic_block_def*)
         $SRC/tree-vect-slp.c:2630
0xd56985 execute
         $SRC/tree-vectorizer.c:759
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <http://gcc.gnu.org/bugs.html> for instructions.

when using the flags
-mcpu=cortex-a53+crypto -save-temps -Ofast -fomit-frame-pointer -fno-aggressive-loop-optimizations

I'll open a bug report to keep track of it.

Thanks,
Kyrill

> Richard.
>
> 2015-12-14  Richard Biener  <rguenther@suse.de>
>
> 	PR tree-optimization/68852
> 	* tree-vectorizer.h (struct _slp_tree): Add def_type member.
> 	(SLP_TREE_DEF_TYPE): New accessor.
> 	* tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
> 	hack.
> 	* tree-vect-slp.c (vect_create_new_slp_node): Initialize
> 	SLP_TREE_DEF_TYPE.
> 	(vect_build_slp_tree): When a node is to be built up from scalars
> 	do not push a NULL as child but instead set its def_type to
> 	vect_external_def.
> 	(vect_analyze_slp_cost_1): Check for child def-type instead
> 	of NULL.
> 	(vect_detect_hybrid_slp_stmts): Likewise.
> 	(vect_bb_slp_scalar_cost): Likewise.
> 	(vect_get_slp_defs): Likewise.
> 	(vect_slp_analyze_node_operations): Likewise.  Before
> 	processing node push the children def-types to the underlying
> 	stmts vinfo and restore it afterwards.
> 	(vect_schedule_slp_instance): Likewise.
> 	(vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
> 	as not vectorizable.
>
> 	* g++.dg/torture/pr68852.C: New testcase.
>
> Index: gcc/tree-vectorizer.h
> ===================================================================
> *** gcc/tree-vectorizer.h	(revision 231552)
> --- gcc/tree-vectorizer.h	(working copy)
> *************** struct _slp_tree {
> *** 107,112 ****
> --- 107,114 ----
>      unsigned int vec_stmts_size;
>      /* Whether the scalar computations use two different operators.  */
>      bool two_operators;
> +   /* The DEF type of this node.  */
> +   enum vect_def_type def_type;
>    };
>    
>    
> *************** typedef struct _slp_instance {
> *** 139,144 ****
> --- 141,147 ----
>    #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
>    #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
>    #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
> + #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
>    
>    
>    
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> *** gcc/tree-vect-stmts.c	(revision 231552)
> --- gcc/tree-vect-stmts.c	(working copy)
> *************** vect_is_simple_use (tree operand, vec_in
> *** 8649,8658 ****
>      else
>        {
>          stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> !       if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
> ! 	*dt = vect_external_def;
> !       else
> ! 	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>        }
>    
>      if (dump_enabled_p ())
> --- 8652,8658 ----
>      else
>        {
>          stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> !       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>        }
>    
>      if (dump_enabled_p ())
> Index: gcc/testsuite/g++.dg/torture/pr68852.C
> ===================================================================
> --- gcc/testsuite/g++.dg/torture/pr68852.C	(revision 0)
> +++ gcc/testsuite/g++.dg/torture/pr68852.C	(working copy)
> @@ -0,0 +1,51 @@
> +/* { dg-do compile } */
> +
> +struct A {
> +    double x, y, z, w;
> +    A() {}
> +    A(double, double p2, double p3, double) : y(p2), z(p3) {}
> +    void m_fn1();
> +};
> +
> +struct B {
> +    double x, y;
> +};
> +struct D : A {
> +    D() {}
> +    D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
> +};
> +
> +class C {
> +public:
> +    float _11, _12, _13, _14;
> +    float _21, _22, _23, _24;
> +    float _31, _32, _33, _34;
> +    float _41, _42, _43, _44;
> +    D m_fn2(B p1) {
> +	double z(p1.x + _43);
> +	return *this * D(p1.x, p1.y, z, 1);
> +    }
> +    int ProjectRectBounds_next;
> +    B __trans_tmp_3;
> +    int m_fn3(int) {
> +	B a, b;
> +	D c[1];
> +	b = __trans_tmp_3;
> +	c[2] = m_fn2(b);
> +	c[3] = m_fn2(a);
> +	c[ProjectRectBounds_next].m_fn1();
> +    }
> +    D operator*(D p1) {
> +	D d;
> +	d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
> +	d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
> +	d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
> +	d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
> +	return d;
> +    }
> +};
> +
> +void fn1() {
> +    C e;
> +    int f = e.m_fn3(f);
> +}
> Index: gcc/tree-vect-slp.c
> ===================================================================
> *** gcc/tree-vect-slp.c	(revision 231610)
> --- gcc/tree-vect-slp.c	(working copy)
> *************** vect_free_slp_tree (slp_tree node)
> *** 51,59 ****
>      int i;
>      slp_tree child;
>    
> -   if (!node)
> -     return;
> -
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>        vect_free_slp_tree (child);
>    
> --- 51,56 ----
> *************** vect_create_new_slp_node (vec<gimple *>
> *** 103,108 ****
> --- 100,106 ----
>      SLP_TREE_CHILDREN (node).create (nops);
>      SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
>      SLP_TREE_TWO_OPERATORS (node) = false;
> +   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
>    
>      return node;
>    }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 938,944 ****
>    	      slp_tree grandchild;
>    
>    	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		if (grandchild != NULL)
>    		  break;
>    	      if (!grandchild)
>    		{
> --- 936,942 ----
>    	      slp_tree grandchild;
>    
>    	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>    		  break;
>    	      if (!grandchild)
>    		{
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 946,960 ****
>    		  *max_nunits = old_max_nunits;
>    		  loads->truncate (old_nloads);
>    		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		      vect_free_slp_tree (grandchild);
>    		  SLP_TREE_CHILDREN (child).truncate (0);
>    
>    		  dump_printf_loc (MSG_NOTE, vect_location,
>    				   "Building parent vector operands from "
>    				   "scalars instead\n");
>    		  oprnd_info->def_stmts = vNULL;
> ! 		  vect_free_slp_tree (child);
> ! 		  SLP_TREE_CHILDREN (*node).quick_push (NULL);
>    		  continue;
>    		}
>    	    }
> --- 944,958 ----
>    		  *max_nunits = old_max_nunits;
>    		  loads->truncate (old_nloads);
>    		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		    vect_free_slp_tree (grandchild);
>    		  SLP_TREE_CHILDREN (child).truncate (0);
>    
>    		  dump_printf_loc (MSG_NOTE, vect_location,
>    				   "Building parent vector operands from "
>    				   "scalars instead\n");
>    		  oprnd_info->def_stmts = vNULL;
> ! 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! 		  SLP_TREE_CHILDREN (*node).quick_push (child);
>    		  continue;
>    		}
>    	    }
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 992,999 ****
>    	  dump_printf_loc (MSG_NOTE, vect_location,
>    			   "Building vector operands from scalars\n");
>    	  oprnd_info->def_stmts = vNULL;
> ! 	  vect_free_slp_tree (child);
> ! 	  SLP_TREE_CHILDREN (*node).quick_push (NULL);
>    	  continue;
>    	}
>    
> --- 990,997 ----
>    	  dump_printf_loc (MSG_NOTE, vect_location,
>    			   "Building vector operands from scalars\n");
>    	  oprnd_info->def_stmts = vNULL;
> ! 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! 	  SLP_TREE_CHILDREN (*node).quick_push (child);
>    	  continue;
>    	}
>    
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1044,1049 ****
> --- 1042,1061 ----
>    				   tem, npermutes, &this_tree_size,
>    				   max_tree_size))
>    	    {
> + 	      /* ... so if successful we can apply the operand swapping
> + 		 to the GIMPLE IL.  This is necessary because for example
> + 		 vect_get_slp_defs uses operand indexes and thus expects
> + 		 canonical operand order.  This is also necessary even
> + 		 if we end up building the operand from scalars as
> + 		 we'll continue to process swapped operand two.  */
> + 	      for (j = 0; j < group_size; ++j)
> + 		if (!matches[j])
> + 		  {
> + 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> + 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> + 				       gimple_assign_rhs2_ptr (stmt));
> + 		  }
> +
>    	      /* If we have all children of child built up from scalars then
>    		 just throw that away and build it up this node from scalars.  */
>    	      if (!SLP_TREE_CHILDREN (child).is_empty ())
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1052,1058 ****
>    		  slp_tree grandchild;
>    
>    		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		    if (grandchild != NULL)
>    		      break;
>    		  if (!grandchild)
>    		    {
> --- 1064,1070 ----
>    		  slp_tree grandchild;
>    
>    		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> ! 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>    		      break;
>    		  if (!grandchild)
>    		    {
> *************** vect_build_slp_tree (vec_info *vinfo,
> *** 1067,1089 ****
>    				       "Building parent vector operands from "
>    				       "scalars instead\n");
>    		      oprnd_info->def_stmts = vNULL;
> ! 		      vect_free_slp_tree (child);
> ! 		      SLP_TREE_CHILDREN (*node).quick_push (NULL);
>    		      continue;
>    		    }
>    		}
>    
> - 	      /* ... so if successful we can apply the operand swapping
> - 		 to the GIMPLE IL.  This is necessary because for example
> - 		 vect_get_slp_defs uses operand indexes and thus expects
> - 		 canonical operand order.  */
> - 	      for (j = 0; j < group_size; ++j)
> - 		if (!matches[j])
> - 		  {
> - 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> - 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> - 				       gimple_assign_rhs2_ptr (stmt));
> - 		  }
>    	      oprnd_info->def_stmts = vNULL;
>    	      SLP_TREE_CHILDREN (*node).quick_push (child);
>    	      continue;
> --- 1079,1090 ----
>    				       "Building parent vector operands from "
>    				       "scalars instead\n");
>    		      oprnd_info->def_stmts = vNULL;
> ! 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
> ! 		      SLP_TREE_CHILDREN (*node).quick_push (child);
>    		      continue;
>    		    }
>    		}
>    
>    	      oprnd_info->def_stmts = vNULL;
>    	      SLP_TREE_CHILDREN (*node).quick_push (child);
>    	      continue;
> *************** vect_print_slp_tree (int dump_kind, loca
> *** 1114,1123 ****
>      gimple *stmt;
>      slp_tree child;
>    
> !   if (!node)
> !     return;
> !
> !   dump_printf_loc (dump_kind, loc, "node\n");
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>        {
>          dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> --- 1115,1123 ----
>      gimple *stmt;
>      slp_tree child;
>    
> !   dump_printf_loc (dump_kind, loc, "node%s\n",
> ! 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
> ! 		   ? " (external)" : "");
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>        {
>          dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> *************** vect_mark_slp_stmts (slp_tree node, enum
> *** 1140,1146 ****
>      gimple *stmt;
>      slp_tree child;
>    
> !   if (!node)
>        return;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1140,1146 ----
>      gimple *stmt;
>      slp_tree child;
>    
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>        return;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_mark_slp_stmts_relevant (slp_tree n
> *** 1162,1168 ****
>      stmt_vec_info stmt_info;
>      slp_tree child;
>    
> !   if (!node)
>        return;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> --- 1162,1168 ----
>      stmt_vec_info stmt_info;
>      slp_tree child;
>    
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>        return;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1400,1406 ****
>    			 stmt_vector_for_cost *body_cost_vec,
>    			 unsigned ncopies_for_cost)
>    {
> !   unsigned i;
>      slp_tree child;
>      gimple *stmt, *s;
>      stmt_vec_info stmt_info;
> --- 1400,1406 ----
>    			 stmt_vector_for_cost *body_cost_vec,
>    			 unsigned ncopies_for_cost)
>    {
> !   unsigned i, j;
>      slp_tree child;
>      gimple *stmt, *s;
>      stmt_vec_info stmt_info;
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1409,1415 ****
>    
>      /* Recurse down the SLP tree.  */
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (child)
>          vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>    			       body_cost_vec, ncopies_for_cost);
>    
> --- 1409,1415 ----
>    
>      /* Recurse down the SLP tree.  */
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>          vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>    			       body_cost_vec, ncopies_for_cost);
>    
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1464,1472 ****
> --- 1464,1479 ----
>    	}
>        }
>    
> +   /* Push SLP node def-type to stmts.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
>      /* Scan operands and account for prologue cost of constants/externals.
>         ???  This over-estimates cost for multiple uses and should be
>         re-engineered.  */
> +   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>      lhs = gimple_get_lhs (stmt);
>      for (i = 0; i < gimple_num_ops (stmt); ++i)
>        {
> *************** vect_analyze_slp_cost_1 (slp_instance in
> *** 1489,1494 ****
> --- 1496,1507 ----
>    			      stmt_info, 0, vect_prologue);
>    	}
>        }
> +
> +   /* Restore stmt def-types.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>    }
>    
>    /* Compute the cost for the SLP instance INSTANCE.  */
> *************** vect_analyze_slp_instance (vec_info *vin
> *** 1795,1800 ****
> --- 1808,1840 ----
>                }
>            }
>    
> +       /* If the loads and stores can be handled with load/store-lane
> +          instructions do not generate this SLP instance.  */
> +       if (is_a <loop_vec_info> (vinfo)
> + 	  && loads_permuted
> + 	  && dr && vect_store_lanes_supported (vectype, group_size))
> + 	{
> + 	  slp_tree load_node;
> + 	  FOR_EACH_VEC_ELT (loads, i, load_node)
> + 	    {
> + 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
> + 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> + 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> + 	      if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE (stmt_vinfo),
> + 					       GROUP_SIZE (stmt_vinfo)))
> + 		break;
> + 	    }
> + 	  if (i == loads.length ())
> + 	    {
> + 	      if (dump_enabled_p ())
> + 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> + 				 "Built SLP cancelled: can use "
> + 				 "load/store-lanes\n");
> +               vect_free_slp_instance (new_instance);
> +               return false;
> + 	    }
> + 	}
> +
>          vinfo->slp_instances.safe_push (new_instance);
>    
>          if (dump_enabled_p ())
> *************** vect_detect_hybrid_slp_stmts (slp_tree n
> *** 2004,2010 ****
>        }
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> !     if (child)
>          vect_detect_hybrid_slp_stmts (child, i, stype);
>    }
>    
> --- 2044,2050 ----
>        }
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> !     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
>          vect_detect_hybrid_slp_stmts (child, i, stype);
>    }
>    
> *************** static bool
> *** 2185,2201 ****
>    vect_slp_analyze_node_operations (slp_tree node)
>    {
>      bool dummy;
> !   int i;
>      gimple *stmt;
>      slp_tree child;
>    
> !   if (!node)
>        return true;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>        if (!vect_slp_analyze_node_operations (child))
>          return false;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>        {
>          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> --- 2225,2248 ----
>    vect_slp_analyze_node_operations (slp_tree node)
>    {
>      bool dummy;
> !   int i, j;
>      gimple *stmt;
>      slp_tree child;
>    
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>        return true;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>        if (!vect_slp_analyze_node_operations (child))
>          return false;
>    
> +   /* Push SLP node def-type to stmts.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
> +   bool res = true;
>      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>        {
>          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> *************** vect_slp_analyze_node_operations (slp_tr
> *** 2203,2212 ****
>          gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>    
>          if (!vect_analyze_stmt (stmt, &dummy, node))
> ! 	return false;
>        }
>    
> !   return true;
>    }
>    
>    
> --- 2250,2268 ----
>          gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>    
>          if (!vect_analyze_stmt (stmt, &dummy, node))
> ! 	{
> ! 	  res = false;
> ! 	  break;
> ! 	}
>        }
>    
> !   /* Restore stmt def-types.  */
> !   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> !       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> ! 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> !
> !   return res;
>    }
>    
>    
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2286,2292 ****
>    	    if (!is_gimple_debug (use_stmt)
>    		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>    					     use_stmt)
> ! 		    || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
>    	      {
>    		(*life)[i] = true;
>    		BREAK_FROM_IMM_USE_STMT (use_iter);
> --- 2342,2348 ----
>    	    if (!is_gimple_debug (use_stmt)
>    		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>    					     use_stmt)
> ! 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
>    	      {
>    		(*life)[i] = true;
>    		BREAK_FROM_IMM_USE_STMT (use_iter);
> *************** vect_bb_slp_scalar_cost (basic_block bb,
> *** 2310,2316 ****
>        }
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (child)
>          scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>    
>      return scalar_cost;
> --- 2366,2372 ----
>        }
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>          scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>    
>      return scalar_cost;
> *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
> *** 2499,2513 ****
>          return NULL;
>        }
>    
> -   /* Mark all the statements that we do not want to vectorize.  */
> -   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
> -        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
> -     {
> -       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
> -       if (STMT_SLP_TYPE (vinfo) != pure_slp)
> - 	STMT_VINFO_VECTORIZABLE (vinfo) = false;
> -     }
> -
>      if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
>    				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
>        {
> --- 2555,2560 ----
> *************** vect_get_slp_defs (vec<tree> ops, slp_tr
> *** 3085,3091 ****
>              child = SLP_TREE_CHILDREN (slp_node)[child_index];
>    
>    	  /* We have to check both pattern and original def, if available.  */
> ! 	  if (child)
>    	    {
>    	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>    	      gimple *related
> --- 3132,3138 ----
>              child = SLP_TREE_CHILDREN (slp_node)[child_index];
>    
>    	  /* We have to check both pattern and original def, if available.  */
> ! 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>    	    {
>    	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>    	      gimple *related
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3374,3388 ****
>      stmt_vec_info stmt_info;
>      unsigned int vec_stmts_size, nunits, group_size;
>      tree vectype;
> !   int i;
>      slp_tree child;
>    
> !   if (!node)
>        return false;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>        vect_schedule_slp_instance (child, instance, vectorization_factor);
>    
>      stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>      stmt_info = vinfo_for_stmt (stmt);
>    
> --- 3421,3441 ----
>      stmt_vec_info stmt_info;
>      unsigned int vec_stmts_size, nunits, group_size;
>      tree vectype;
> !   int i, j;
>      slp_tree child;
>    
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>        return false;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>        vect_schedule_slp_instance (child, instance, vectorization_factor);
>    
> +   /* Push SLP node def-type to stmts.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
> +
>      stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>      stmt_info = vinfo_for_stmt (stmt);
>    
> *************** vect_schedule_slp_instance (slp_tree nod
> *** 3501,3506 ****
> --- 3554,3566 ----
>    	}
>        }
>      is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
> +
> +   /* Restore stmt def-types.  */
> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> +
>      return is_store;
>    }
>    
> *************** vect_remove_slp_scalar_calls (slp_tree n
> *** 3519,3525 ****
>      tree lhs;
>      stmt_vec_info stmt_info;
>    
> !   if (!node)
>        return;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> --- 3579,3585 ----
>      tree lhs;
>      stmt_vec_info stmt_info;
>    
> !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>        return;
>    
>      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fix PR68852
  2015-12-17 16:31 ` Kyrill Tkachov
@ 2015-12-18  8:57   ` Richard Biener
  2015-12-18  9:09     ` Kyrill Tkachov
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2015-12-18  8:57 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: gcc-patches

On Thu, 17 Dec 2015, Kyrill Tkachov wrote:

> 
> On 14/12/15 15:14, Richard Biener wrote:
> > The following fixes PR68852 - so I finally needed to sit down and
> > fix the "build-from-scalars" hack in the SLP vectorizer by pretending
> > we'd have a sane vectorizer IL.  Basically I now mark the SLP node
> > with a proper vect_def_type but I have to push that down to the
> > stmt-info level whenever sth would look at it.
> > 
> > It's a bit ugly but not too much yet ;)
> > 
> > Anyway, the proper fix is to have a sane data structure, nothing for
> > GCC 6 though.
> > 
> > Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
> > 
> > Verified SPEC CPU 2006 is happy with the patch.
> 
> Unfortunately it's not very happy on aarch64 ;)
> 416.gamess and the trans.fppized.f in particular ICEs after this patch with
> 
> trans.fppized.f:2086:0:
> 
>        SUBROUTINE TRFMCX(NPRINT,ICORBS,IORBS,IORB,DOFOCK,DOEXCH,
> 
> 
> internal compiler error: in vect_analyze_stmt, at tree-vect-stmts.c:8013
> 0xd34d1b vect_analyze_stmt(gimple*, bool*, _slp_tree*)
>         $SRC/tree-vect-stmts.c:8013
> 0xd4b64a vect_slp_analyze_node_operations
>         $SRC/tree-vect-slp.c:2237
> 0xd4b533 vect_slp_analyze_node_operations
>         $SRC/tree-vect-slp.c:2221
> 0xd4b533 vect_slp_analyze_node_operations
>         $SRC/tree-vect-slp.c:2221
> 0xd4b533 vect_slp_analyze_node_operations
>         $SRC/tree-vect-slp.c:2221
> 0xd4b533 vect_slp_analyze_node_operations
>         $SRC/tree-vect-slp.c:2221
> 0xd4f7dc vect_slp_analyze_operations(vec<_slp_instance*, va_heap, vl_ptr>,
> void*)
>         $SRC/tree-vect-slp.c:2269
> 0xd546a0 vect_slp_analyze_bb_1
>         $SRC/tree-vect-slp.c:2543
> 0xd546a0 vect_slp_bb(basic_block_def*)
>         $SRC/tree-vect-slp.c:2630
> 0xd56985 execute
>         $SRC/tree-vectorizer.c:759
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See <http://gcc.gnu.org/bugs.html> for instructions.
> 
> when using the flags
> -mcpu=cortex-a53+crypto -save-temps -Ofast -fomit-frame-pointer
> -fno-aggressive-loop-optimizations
> 
> I'll open a bug report to keep track of it.

This sounds like PR68946 which I just fixed?

Richard.

> Thanks,
> Kyrill
> 
> > Richard.
> > 
> > 2015-12-14  Richard Biener  <rguenther@suse.de>
> > 
> > 	PR tree-optimization/68852
> > 	* tree-vectorizer.h (struct _slp_tree): Add def_type member.
> > 	(SLP_TREE_DEF_TYPE): New accessor.
> > 	* tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
> > 	hack.
> > 	* tree-vect-slp.c (vect_create_new_slp_node): Initialize
> > 	SLP_TREE_DEF_TYPE.
> > 	(vect_build_slp_tree): When a node is to be built up from scalars
> > 	do not push a NULL as child but instead set its def_type to
> > 	vect_external_def.
> > 	(vect_analyze_slp_cost_1): Check for child def-type instead
> > 	of NULL.
> > 	(vect_detect_hybrid_slp_stmts): Likewise.
> > 	(vect_bb_slp_scalar_cost): Likewise.
> > 	(vect_get_slp_defs): Likewise.
> > 	(vect_slp_analyze_node_operations): Likewise.  Before
> > 	processing node push the children def-types to the underlying
> > 	stmts vinfo and restore it afterwards.
> > 	(vect_schedule_slp_instance): Likewise.
> > 	(vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
> > 	as not vectorizable.
> > 
> > 	* g++.dg/torture/pr68852.C: New testcase.
> > 
> > Index: gcc/tree-vectorizer.h
> > ===================================================================
> > *** gcc/tree-vectorizer.h	(revision 231552)
> > --- gcc/tree-vectorizer.h	(working copy)
> > *************** struct _slp_tree {
> > *** 107,112 ****
> > --- 107,114 ----
> >      unsigned int vec_stmts_size;
> >      /* Whether the scalar computations use two different operators.  */
> >      bool two_operators;
> > +   /* The DEF type of this node.  */
> > +   enum vect_def_type def_type;
> >    };
> >       *************** typedef struct _slp_instance {
> > *** 139,144 ****
> > --- 141,147 ----
> >    #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
> >    #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
> >    #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
> > + #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
> >          Index: gcc/tree-vect-stmts.c
> > ===================================================================
> > *** gcc/tree-vect-stmts.c	(revision 231552)
> > --- gcc/tree-vect-stmts.c	(working copy)
> > *************** vect_is_simple_use (tree operand, vec_in
> > *** 8649,8658 ****
> >      else
> >        {
> >          stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> > !       if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE
> > (stmt_vinfo))
> > ! 	*dt = vect_external_def;
> > !       else
> > ! 	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> >        }
> >         if (dump_enabled_p ())
> > --- 8652,8658 ----
> >      else
> >        {
> >          stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
> > !       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
> >        }
> >         if (dump_enabled_p ())
> > Index: gcc/testsuite/g++.dg/torture/pr68852.C
> > ===================================================================
> > --- gcc/testsuite/g++.dg/torture/pr68852.C	(revision 0)
> > +++ gcc/testsuite/g++.dg/torture/pr68852.C	(working copy)
> > @@ -0,0 +1,51 @@
> > +/* { dg-do compile } */
> > +
> > +struct A {
> > +    double x, y, z, w;
> > +    A() {}
> > +    A(double, double p2, double p3, double) : y(p2), z(p3) {}
> > +    void m_fn1();
> > +};
> > +
> > +struct B {
> > +    double x, y;
> > +};
> > +struct D : A {
> > +    D() {}
> > +    D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
> > +};
> > +
> > +class C {
> > +public:
> > +    float _11, _12, _13, _14;
> > +    float _21, _22, _23, _24;
> > +    float _31, _32, _33, _34;
> > +    float _41, _42, _43, _44;
> > +    D m_fn2(B p1) {
> > +	double z(p1.x + _43);
> > +	return *this * D(p1.x, p1.y, z, 1);
> > +    }
> > +    int ProjectRectBounds_next;
> > +    B __trans_tmp_3;
> > +    int m_fn3(int) {
> > +	B a, b;
> > +	D c[1];
> > +	b = __trans_tmp_3;
> > +	c[2] = m_fn2(b);
> > +	c[3] = m_fn2(a);
> > +	c[ProjectRectBounds_next].m_fn1();
> > +    }
> > +    D operator*(D p1) {
> > +	D d;
> > +	d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
> > +	d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
> > +	d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
> > +	d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
> > +	return d;
> > +    }
> > +};
> > +
> > +void fn1() {
> > +    C e;
> > +    int f = e.m_fn3(f);
> > +}
> > Index: gcc/tree-vect-slp.c
> > ===================================================================
> > *** gcc/tree-vect-slp.c	(revision 231610)
> > --- gcc/tree-vect-slp.c	(working copy)
> > *************** vect_free_slp_tree (slp_tree node)
> > *** 51,59 ****
> >      int i;
> >      slp_tree child;
> >    -   if (!node)
> > -     return;
> > -
> >      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> >        vect_free_slp_tree (child);
> >    --- 51,56 ----
> > *************** vect_create_new_slp_node (vec<gimple *>
> > *** 103,108 ****
> > --- 100,106 ----
> >      SLP_TREE_CHILDREN (node).create (nops);
> >      SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
> >      SLP_TREE_TWO_OPERATORS (node) = false;
> > +   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
> >         return node;
> >    }
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 938,944 ****
> >    	      slp_tree grandchild;
> >       	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! 		if (grandchild != NULL)
> >    		  break;
> >    	      if (!grandchild)
> >    		{
> > --- 936,942 ----
> >    	      slp_tree grandchild;
> >       	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> >    		  break;
> >    	      if (!grandchild)
> >    		{
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 946,960 ****
> >    		  *max_nunits = old_max_nunits;
> >    		  loads->truncate (old_nloads);
> >    		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> > ! 		      vect_free_slp_tree (grandchild);
> >    		  SLP_TREE_CHILDREN (child).truncate (0);
> >       		  dump_printf_loc (MSG_NOTE, vect_location,
> >    				   "Building parent vector operands from "
> >    				   "scalars instead\n");
> >    		  oprnd_info->def_stmts = vNULL;
> > ! 		  vect_free_slp_tree (child);
> > ! 		  SLP_TREE_CHILDREN (*node).quick_push (NULL);
> >    		  continue;
> >    		}
> >    	    }
> > --- 944,958 ----
> >    		  *max_nunits = old_max_nunits;
> >    		  loads->truncate (old_nloads);
> >    		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
> > ! 		    vect_free_slp_tree (grandchild);
> >    		  SLP_TREE_CHILDREN (child).truncate (0);
> >       		  dump_printf_loc (MSG_NOTE, vect_location,
> >    				   "Building parent vector operands from "
> >    				   "scalars instead\n");
> >    		  oprnd_info->def_stmts = vNULL;
> > ! 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
> > ! 		  SLP_TREE_CHILDREN (*node).quick_push (child);
> >    		  continue;
> >    		}
> >    	    }
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 992,999 ****
> >    	  dump_printf_loc (MSG_NOTE, vect_location,
> >    			   "Building vector operands from scalars\n");
> >    	  oprnd_info->def_stmts = vNULL;
> > ! 	  vect_free_slp_tree (child);
> > ! 	  SLP_TREE_CHILDREN (*node).quick_push (NULL);
> >    	  continue;
> >    	}
> >    --- 990,997 ----
> >    	  dump_printf_loc (MSG_NOTE, vect_location,
> >    			   "Building vector operands from scalars\n");
> >    	  oprnd_info->def_stmts = vNULL;
> > ! 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
> > ! 	  SLP_TREE_CHILDREN (*node).quick_push (child);
> >    	  continue;
> >    	}
> >    *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 1044,1049 ****
> > --- 1042,1061 ----
> >    				   tem, npermutes, &this_tree_size,
> >    				   max_tree_size))
> >    	    {
> > + 	      /* ... so if successful we can apply the operand swapping
> > + 		 to the GIMPLE IL.  This is necessary because for example
> > + 		 vect_get_slp_defs uses operand indexes and thus expects
> > + 		 canonical operand order.  This is also necessary even
> > + 		 if we end up building the operand from scalars as
> > + 		 we'll continue to process swapped operand two.  */
> > + 	      for (j = 0; j < group_size; ++j)
> > + 		if (!matches[j])
> > + 		  {
> > + 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> > + 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> > + 				       gimple_assign_rhs2_ptr (stmt));
> > + 		  }
> > +
> >    	      /* If we have all children of child built up from scalars then
> >    		 just throw that away and build it up this node from scalars.
> > */
> >    	      if (!SLP_TREE_CHILDREN (child).is_empty ())
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 1052,1058 ****
> >    		  slp_tree grandchild;
> >       		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! 		    if (grandchild != NULL)
> >    		      break;
> >    		  if (!grandchild)
> >    		    {
> > --- 1064,1070 ----
> >    		  slp_tree grandchild;
> >       		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
> > grandchild)
> > ! 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
> >    		      break;
> >    		  if (!grandchild)
> >    		    {
> > *************** vect_build_slp_tree (vec_info *vinfo,
> > *** 1067,1089 ****
> >    				       "Building parent vector operands from "
> >    				       "scalars instead\n");
> >    		      oprnd_info->def_stmts = vNULL;
> > ! 		      vect_free_slp_tree (child);
> > ! 		      SLP_TREE_CHILDREN (*node).quick_push (NULL);
> >    		      continue;
> >    		    }
> >    		}
> >    - 	      /* ... so if successful we can apply the operand swapping
> > - 		 to the GIMPLE IL.  This is necessary because for example
> > - 		 vect_get_slp_defs uses operand indexes and thus expects
> > - 		 canonical operand order.  */
> > - 	      for (j = 0; j < group_size; ++j)
> > - 		if (!matches[j])
> > - 		  {
> > - 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
> > - 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
> > - 				       gimple_assign_rhs2_ptr (stmt));
> > - 		  }
> >    	      oprnd_info->def_stmts = vNULL;
> >    	      SLP_TREE_CHILDREN (*node).quick_push (child);
> >    	      continue;
> > --- 1079,1090 ----
> >    				       "Building parent vector operands from "
> >    				       "scalars instead\n");
> >    		      oprnd_info->def_stmts = vNULL;
> > ! 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
> > ! 		      SLP_TREE_CHILDREN (*node).quick_push (child);
> >    		      continue;
> >    		    }
> >    		}
> >       	      oprnd_info->def_stmts = vNULL;
> >    	      SLP_TREE_CHILDREN (*node).quick_push (child);
> >    	      continue;
> > *************** vect_print_slp_tree (int dump_kind, loca
> > *** 1114,1123 ****
> >      gimple *stmt;
> >      slp_tree child;
> >    !   if (!node)
> > !     return;
> > !
> > !   dump_printf_loc (dump_kind, loc, "node\n");
> >      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> >        {
> >          dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> > --- 1115,1123 ----
> >      gimple *stmt;
> >      slp_tree child;
> >    !   dump_printf_loc (dump_kind, loc, "node%s\n",
> > ! 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
> > ! 		   ? " (external)" : "");
> >      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> >        {
> >          dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
> > *************** vect_mark_slp_stmts (slp_tree node, enum
> > *** 1140,1146 ****
> >      gimple *stmt;
> >      slp_tree child;
> >    !   if (!node)
> >        return;
> >         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > --- 1140,1146 ----
> >      gimple *stmt;
> >      slp_tree child;
> >    !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> >        return;
> >         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > *************** vect_mark_slp_stmts_relevant (slp_tree n
> > *** 1162,1168 ****
> >      stmt_vec_info stmt_info;
> >      slp_tree child;
> >    !   if (!node)
> >        return;
> >         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > --- 1162,1168 ----
> >      stmt_vec_info stmt_info;
> >      slp_tree child;
> >    !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> >        return;
> >         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1400,1406 ****
> >    			 stmt_vector_for_cost *body_cost_vec,
> >    			 unsigned ncopies_for_cost)
> >    {
> > !   unsigned i;
> >      slp_tree child;
> >      gimple *stmt, *s;
> >      stmt_vec_info stmt_info;
> > --- 1400,1406 ----
> >    			 stmt_vector_for_cost *body_cost_vec,
> >    			 unsigned ncopies_for_cost)
> >    {
> > !   unsigned i, j;
> >      slp_tree child;
> >      gimple *stmt, *s;
> >      stmt_vec_info stmt_info;
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1409,1415 ****
> >         /* Recurse down the SLP tree.  */
> >      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > !     if (child)
> >          vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> >    			       body_cost_vec, ncopies_for_cost);
> >    --- 1409,1415 ----
> >         /* Recurse down the SLP tree.  */
> >      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> >          vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
> >    			       body_cost_vec, ncopies_for_cost);
> >    *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1464,1472 ****
> > --- 1464,1479 ----
> >    	}
> >        }
> >    +   /* Push SLP node def-type to stmts.  */
> > +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
> > (child);
> > +
> >      /* Scan operands and account for prologue cost of constants/externals.
> >         ???  This over-estimates cost for multiple uses and should be
> >         re-engineered.  */
> > +   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> >      lhs = gimple_get_lhs (stmt);
> >      for (i = 0; i < gimple_num_ops (stmt); ++i)
> >        {
> > *************** vect_analyze_slp_cost_1 (slp_instance in
> > *** 1489,1494 ****
> > --- 1496,1507 ----
> >    			      stmt_info, 0, vect_prologue);
> >    	}
> >        }
> > +
> > +   /* Restore stmt def-types.  */
> > +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> >    }
> >       /* Compute the cost for the SLP instance INSTANCE.  */
> > *************** vect_analyze_slp_instance (vec_info *vin
> > *** 1795,1800 ****
> > --- 1808,1840 ----
> >                }
> >            }
> >    +       /* If the loads and stores can be handled with load/store-lane
> > +          instructions do not generate this SLP instance.  */
> > +       if (is_a <loop_vec_info> (vinfo)
> > + 	  && loads_permuted
> > + 	  && dr && vect_store_lanes_supported (vectype, group_size))
> > + 	{
> > + 	  slp_tree load_node;
> > + 	  FOR_EACH_VEC_ELT (loads, i, load_node)
> > + 	    {
> > + 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
> > + 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
> > + 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
> > + 	      if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE
> > (stmt_vinfo),
> > + 					       GROUP_SIZE (stmt_vinfo)))
> > + 		break;
> > + 	    }
> > + 	  if (i == loads.length ())
> > + 	    {
> > + 	      if (dump_enabled_p ())
> > + 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> > + 				 "Built SLP cancelled: can use "
> > + 				 "load/store-lanes\n");
> > +               vect_free_slp_instance (new_instance);
> > +               return false;
> > + 	    }
> > + 	}
> > +
> >          vinfo->slp_instances.safe_push (new_instance);
> >             if (dump_enabled_p ())
> > *************** vect_detect_hybrid_slp_stmts (slp_tree n
> > *** 2004,2010 ****
> >        }
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> > !     if (child)
> >          vect_detect_hybrid_slp_stmts (child, i, stype);
> >    }
> >    --- 2044,2050 ----
> >        }
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
> > !     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
> >          vect_detect_hybrid_slp_stmts (child, i, stype);
> >    }
> >    *************** static bool
> > *** 2185,2201 ****
> >    vect_slp_analyze_node_operations (slp_tree node)
> >    {
> >      bool dummy;
> > !   int i;
> >      gimple *stmt;
> >      slp_tree child;
> >    !   if (!node)
> >        return true;
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> >        if (!vect_slp_analyze_node_operations (child))
> >          return false;
> >         FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> >        {
> >          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> > --- 2225,2248 ----
> >    vect_slp_analyze_node_operations (slp_tree node)
> >    {
> >      bool dummy;
> > !   int i, j;
> >      gimple *stmt;
> >      slp_tree child;
> >    !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> >        return true;
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> >        if (!vect_slp_analyze_node_operations (child))
> >          return false;
> >    +   /* Push SLP node def-type to stmts.  */
> > +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
> > (child);
> > +
> > +   bool res = true;
> >      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
> >        {
> >          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
> > *************** vect_slp_analyze_node_operations (slp_tr
> > *** 2203,2212 ****
> >          gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
> >             if (!vect_analyze_stmt (stmt, &dummy, node))
> > ! 	return false;
> >        }
> >    !   return true;
> >    }
> >       --- 2250,2268 ----
> >          gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
> >             if (!vect_analyze_stmt (stmt, &dummy, node))
> > ! 	{
> > ! 	  res = false;
> > ! 	  break;
> > ! 	}
> >        }
> >    !   /* Restore stmt def-types.  */
> > !   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > !     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > !       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > ! 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> > !
> > !   return res;
> >    }
> >       *************** vect_bb_slp_scalar_cost (basic_block bb,
> > *** 2286,2292 ****
> >    	    if (!is_gimple_debug (use_stmt)
> >    		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> >    					     use_stmt)
> > ! 		    || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
> >    	      {
> >    		(*life)[i] = true;
> >    		BREAK_FROM_IMM_USE_STMT (use_iter);
> > --- 2342,2348 ----
> >    	    if (!is_gimple_debug (use_stmt)
> >    		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
> >    					     use_stmt)
> > ! 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
> >    	      {
> >    		(*life)[i] = true;
> >    		BREAK_FROM_IMM_USE_STMT (use_iter);
> > *************** vect_bb_slp_scalar_cost (basic_block bb,
> > *** 2310,2316 ****
> >        }
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > !     if (child)
> >          scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
> >         return scalar_cost;
> > --- 2366,2372 ----
> >        }
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> >          scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
> >         return scalar_cost;
> > *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
> > *** 2499,2513 ****
> >          return NULL;
> >        }
> >    -   /* Mark all the statements that we do not want to vectorize.  */
> > -   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
> > -        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
> > -     {
> > -       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
> > -       if (STMT_SLP_TYPE (vinfo) != pure_slp)
> > - 	STMT_VINFO_VECTORIZABLE (vinfo) = false;
> > -     }
> > -
> >      if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
> >    				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
> >        {
> > --- 2555,2560 ----
> > *************** vect_get_slp_defs (vec<tree> ops, slp_tr
> > *** 3085,3091 ****
> >              child = SLP_TREE_CHILDREN (slp_node)[child_index];
> >       	  /* We have to check both pattern and original def, if
> > available.  */
> > ! 	  if (child)
> >    	    {
> >    	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> >    	      gimple *related
> > --- 3132,3138 ----
> >              child = SLP_TREE_CHILDREN (slp_node)[child_index];
> >       	  /* We have to check both pattern and original def, if
> > available.  */
> > ! 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
> >    	    {
> >    	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
> >    	      gimple *related
> > *************** vect_schedule_slp_instance (slp_tree nod
> > *** 3374,3388 ****
> >      stmt_vec_info stmt_info;
> >      unsigned int vec_stmts_size, nunits, group_size;
> >      tree vectype;
> > !   int i;
> >      slp_tree child;
> >    !   if (!node)
> >        return false;
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> >        vect_schedule_slp_instance (child, instance, vectorization_factor);
> >         stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> >      stmt_info = vinfo_for_stmt (stmt);
> >    --- 3421,3441 ----
> >      stmt_vec_info stmt_info;
> >      unsigned int vec_stmts_size, nunits, group_size;
> >      tree vectype;
> > !   int i, j;
> >      slp_tree child;
> >    !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> >        return false;
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> >        vect_schedule_slp_instance (child, instance, vectorization_factor);
> >    +   /* Push SLP node def-type to stmts.  */
> > +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
> > (child);
> > +
> >      stmt = SLP_TREE_SCALAR_STMTS (node)[0];
> >      stmt_info = vinfo_for_stmt (stmt);
> >    *************** vect_schedule_slp_instance (slp_tree nod
> > *** 3501,3506 ****
> > --- 3554,3566 ----
> >    	}
> >        }
> >      is_store = vect_transform_stmt (stmt, &si, &grouped_store, node,
> > instance);
> > +
> > +   /* Restore stmt def-types.  */
> > +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
> > +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
> > + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
> > +
> >      return is_store;
> >    }
> >    *************** vect_remove_slp_scalar_calls (slp_tree n
> > *** 3519,3525 ****
> >      tree lhs;
> >      stmt_vec_info stmt_info;
> >    !   if (!node)
> >        return;
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > --- 3579,3585 ----
> >      tree lhs;
> >      stmt_vec_info stmt_info;
> >    !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
> >        return;
> >         FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
> > 
> 
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Fix PR68852
  2015-12-18  8:57   ` Richard Biener
@ 2015-12-18  9:09     ` Kyrill Tkachov
  0 siblings, 0 replies; 5+ messages in thread
From: Kyrill Tkachov @ 2015-12-18  9:09 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches


On 18/12/15 08:57, Richard Biener wrote:
> On Thu, 17 Dec 2015, Kyrill Tkachov wrote:
>
>> On 14/12/15 15:14, Richard Biener wrote:
>>> The following fixes PR68852 - so I finally needed to sit down and
>>> fix the "build-from-scalars" hack in the SLP vectorizer by pretending
>>> we'd have a sane vectorizer IL.  Basically I now mark the SLP node
>>> with a proper vect_def_type but I have to push that down to the
>>> stmt-info level whenever sth would look at it.
>>>
>>> It's a bit ugly but not too much yet ;)
>>>
>>> Anyway, the proper fix is to have a sane data structure, nothing for
>>> GCC 6 though.
>>>
>>> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>>>
>>> Verified SPEC CPU 2006 is happy with the patch.
>> Unfortunately it's not very happy on aarch64 ;)
>> 416.gamess and the trans.fppized.f in particular ICEs after this patch with
>>
>> trans.fppized.f:2086:0:
>>
>>         SUBROUTINE TRFMCX(NPRINT,ICORBS,IORBS,IORB,DOFOCK,DOEXCH,
>>
>>
>> internal compiler error: in vect_analyze_stmt, at tree-vect-stmts.c:8013
>> 0xd34d1b vect_analyze_stmt(gimple*, bool*, _slp_tree*)
>>          $SRC/tree-vect-stmts.c:8013
>> 0xd4b64a vect_slp_analyze_node_operations
>>          $SRC/tree-vect-slp.c:2237
>> 0xd4b533 vect_slp_analyze_node_operations
>>          $SRC/tree-vect-slp.c:2221
>> 0xd4b533 vect_slp_analyze_node_operations
>>          $SRC/tree-vect-slp.c:2221
>> 0xd4b533 vect_slp_analyze_node_operations
>>          $SRC/tree-vect-slp.c:2221
>> 0xd4b533 vect_slp_analyze_node_operations
>>          $SRC/tree-vect-slp.c:2221
>> 0xd4f7dc vect_slp_analyze_operations(vec<_slp_instance*, va_heap, vl_ptr>,
>> void*)
>>          $SRC/tree-vect-slp.c:2269
>> 0xd546a0 vect_slp_analyze_bb_1
>>          $SRC/tree-vect-slp.c:2543
>> 0xd546a0 vect_slp_bb(basic_block_def*)
>>          $SRC/tree-vect-slp.c:2630
>> 0xd56985 execute
>>          $SRC/tree-vectorizer.c:759
>> Please submit a full bug report,
>> with preprocessed source if appropriate.
>> Please include the complete backtrace with any bug report.
>> See <http://gcc.gnu.org/bugs.html> for instructions.
>>
>> when using the flags
>> -mcpu=cortex-a53+crypto -save-temps -Ofast -fomit-frame-pointer
>> -fno-aggressive-loop-optimizations
>>
>> I'll open a bug report to keep track of it.
> This sounds like PR68946 which I just fixed?

Looks like it. Latest trunk does not ICE.
Sorry for the noise.

Kyrill

> Richard.
>
>> Thanks,
>> Kyrill
>>
>>> Richard.
>>>
>>> 2015-12-14  Richard Biener  <rguenther@suse.de>
>>>
>>> 	PR tree-optimization/68852
>>> 	* tree-vectorizer.h (struct _slp_tree): Add def_type member.
>>> 	(SLP_TREE_DEF_TYPE): New accessor.
>>> 	* tree-vect-stmts.c (vect_is_simple_use): Remove BB vectorization
>>> 	hack.
>>> 	* tree-vect-slp.c (vect_create_new_slp_node): Initialize
>>> 	SLP_TREE_DEF_TYPE.
>>> 	(vect_build_slp_tree): When a node is to be built up from scalars
>>> 	do not push a NULL as child but instead set its def_type to
>>> 	vect_external_def.
>>> 	(vect_analyze_slp_cost_1): Check for child def-type instead
>>> 	of NULL.
>>> 	(vect_detect_hybrid_slp_stmts): Likewise.
>>> 	(vect_bb_slp_scalar_cost): Likewise.
>>> 	(vect_get_slp_defs): Likewise.
>>> 	(vect_slp_analyze_node_operations): Likewise.  Before
>>> 	processing node push the children def-types to the underlying
>>> 	stmts vinfo and restore it afterwards.
>>> 	(vect_schedule_slp_instance): Likewise.
>>> 	(vect_slp_analyze_bb_1): Do not mark stmts not in SLP instances
>>> 	as not vectorizable.
>>>
>>> 	* g++.dg/torture/pr68852.C: New testcase.
>>>
>>> Index: gcc/tree-vectorizer.h
>>> ===================================================================
>>> *** gcc/tree-vectorizer.h	(revision 231552)
>>> --- gcc/tree-vectorizer.h	(working copy)
>>> *************** struct _slp_tree {
>>> *** 107,112 ****
>>> --- 107,114 ----
>>>       unsigned int vec_stmts_size;
>>>       /* Whether the scalar computations use two different operators.  */
>>>       bool two_operators;
>>> +   /* The DEF type of this node.  */
>>> +   enum vect_def_type def_type;
>>>     };
>>>        *************** typedef struct _slp_instance {
>>> *** 139,144 ****
>>> --- 141,147 ----
>>>     #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
>>>     #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
>>>     #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
>>> + #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
>>>           Index: gcc/tree-vect-stmts.c
>>> ===================================================================
>>> *** gcc/tree-vect-stmts.c	(revision 231552)
>>> --- gcc/tree-vect-stmts.c	(working copy)
>>> *************** vect_is_simple_use (tree operand, vec_in
>>> *** 8649,8658 ****
>>>       else
>>>         {
>>>           stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
>>> !       if (is_a <bb_vec_info> (vinfo) && !STMT_VINFO_VECTORIZABLE
>>> (stmt_vinfo))
>>> ! 	*dt = vect_external_def;
>>> !       else
>>> ! 	*dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>>>         }
>>>          if (dump_enabled_p ())
>>> --- 8652,8658 ----
>>>       else
>>>         {
>>>           stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
>>> !       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
>>>         }
>>>          if (dump_enabled_p ())
>>> Index: gcc/testsuite/g++.dg/torture/pr68852.C
>>> ===================================================================
>>> --- gcc/testsuite/g++.dg/torture/pr68852.C	(revision 0)
>>> +++ gcc/testsuite/g++.dg/torture/pr68852.C	(working copy)
>>> @@ -0,0 +1,51 @@
>>> +/* { dg-do compile } */
>>> +
>>> +struct A {
>>> +    double x, y, z, w;
>>> +    A() {}
>>> +    A(double, double p2, double p3, double) : y(p2), z(p3) {}
>>> +    void m_fn1();
>>> +};
>>> +
>>> +struct B {
>>> +    double x, y;
>>> +};
>>> +struct D : A {
>>> +    D() {}
>>> +    D(double p1, double p2, double p3, double p4) : A(p1, p2, p3, p4) {}
>>> +};
>>> +
>>> +class C {
>>> +public:
>>> +    float _11, _12, _13, _14;
>>> +    float _21, _22, _23, _24;
>>> +    float _31, _32, _33, _34;
>>> +    float _41, _42, _43, _44;
>>> +    D m_fn2(B p1) {
>>> +	double z(p1.x + _43);
>>> +	return *this * D(p1.x, p1.y, z, 1);
>>> +    }
>>> +    int ProjectRectBounds_next;
>>> +    B __trans_tmp_3;
>>> +    int m_fn3(int) {
>>> +	B a, b;
>>> +	D c[1];
>>> +	b = __trans_tmp_3;
>>> +	c[2] = m_fn2(b);
>>> +	c[3] = m_fn2(a);
>>> +	c[ProjectRectBounds_next].m_fn1();
>>> +    }
>>> +    D operator*(D p1) {
>>> +	D d;
>>> +	d.x = p1.x * _11 + p1.y * _21 + p1.z * _31 + _41;
>>> +	d.y = p1.x * _12 + p1.y * _22 + p1.z * _32 + _42;
>>> +	d.z = p1.x * _13 + p1.y * _23 + p1.z * _33 + _43;
>>> +	d.w = p1.x * _14 + p1.y * _24 + p1.z * _34 + _44;
>>> +	return d;
>>> +    }
>>> +};
>>> +
>>> +void fn1() {
>>> +    C e;
>>> +    int f = e.m_fn3(f);
>>> +}
>>> Index: gcc/tree-vect-slp.c
>>> ===================================================================
>>> *** gcc/tree-vect-slp.c	(revision 231610)
>>> --- gcc/tree-vect-slp.c	(working copy)
>>> *************** vect_free_slp_tree (slp_tree node)
>>> *** 51,59 ****
>>>       int i;
>>>       slp_tree child;
>>>     -   if (!node)
>>> -     return;
>>> -
>>>       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>         vect_free_slp_tree (child);
>>>     --- 51,56 ----
>>> *************** vect_create_new_slp_node (vec<gimple *>
>>> *** 103,108 ****
>>> --- 100,106 ----
>>>       SLP_TREE_CHILDREN (node).create (nops);
>>>       SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
>>>       SLP_TREE_TWO_OPERATORS (node) = false;
>>> +   SLP_TREE_DEF_TYPE (node) = vect_internal_def;
>>>          return node;
>>>     }
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 938,944 ****
>>>     	      slp_tree grandchild;
>>>        	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! 		if (grandchild != NULL)
>>>     		  break;
>>>     	      if (!grandchild)
>>>     		{
>>> --- 936,942 ----
>>>     	      slp_tree grandchild;
>>>        	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! 		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>>>     		  break;
>>>     	      if (!grandchild)
>>>     		{
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 946,960 ****
>>>     		  *max_nunits = old_max_nunits;
>>>     		  loads->truncate (old_nloads);
>>>     		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
>>> ! 		      vect_free_slp_tree (grandchild);
>>>     		  SLP_TREE_CHILDREN (child).truncate (0);
>>>        		  dump_printf_loc (MSG_NOTE, vect_location,
>>>     				   "Building parent vector operands from "
>>>     				   "scalars instead\n");
>>>     		  oprnd_info->def_stmts = vNULL;
>>> ! 		  vect_free_slp_tree (child);
>>> ! 		  SLP_TREE_CHILDREN (*node).quick_push (NULL);
>>>     		  continue;
>>>     		}
>>>     	    }
>>> --- 944,958 ----
>>>     		  *max_nunits = old_max_nunits;
>>>     		  loads->truncate (old_nloads);
>>>     		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
>>> ! 		    vect_free_slp_tree (grandchild);
>>>     		  SLP_TREE_CHILDREN (child).truncate (0);
>>>        		  dump_printf_loc (MSG_NOTE, vect_location,
>>>     				   "Building parent vector operands from "
>>>     				   "scalars instead\n");
>>>     		  oprnd_info->def_stmts = vNULL;
>>> ! 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
>>> ! 		  SLP_TREE_CHILDREN (*node).quick_push (child);
>>>     		  continue;
>>>     		}
>>>     	    }
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 992,999 ****
>>>     	  dump_printf_loc (MSG_NOTE, vect_location,
>>>     			   "Building vector operands from scalars\n");
>>>     	  oprnd_info->def_stmts = vNULL;
>>> ! 	  vect_free_slp_tree (child);
>>> ! 	  SLP_TREE_CHILDREN (*node).quick_push (NULL);
>>>     	  continue;
>>>     	}
>>>     --- 990,997 ----
>>>     	  dump_printf_loc (MSG_NOTE, vect_location,
>>>     			   "Building vector operands from scalars\n");
>>>     	  oprnd_info->def_stmts = vNULL;
>>> ! 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
>>> ! 	  SLP_TREE_CHILDREN (*node).quick_push (child);
>>>     	  continue;
>>>     	}
>>>     *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 1044,1049 ****
>>> --- 1042,1061 ----
>>>     				   tem, npermutes, &this_tree_size,
>>>     				   max_tree_size))
>>>     	    {
>>> + 	      /* ... so if successful we can apply the operand swapping
>>> + 		 to the GIMPLE IL.  This is necessary because for example
>>> + 		 vect_get_slp_defs uses operand indexes and thus expects
>>> + 		 canonical operand order.  This is also necessary even
>>> + 		 if we end up building the operand from scalars as
>>> + 		 we'll continue to process swapped operand two.  */
>>> + 	      for (j = 0; j < group_size; ++j)
>>> + 		if (!matches[j])
>>> + 		  {
>>> + 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
>>> + 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
>>> + 				       gimple_assign_rhs2_ptr (stmt));
>>> + 		  }
>>> +
>>>     	      /* If we have all children of child built up from scalars then
>>>     		 just throw that away and build it up this node from scalars.
>>> */
>>>     	      if (!SLP_TREE_CHILDREN (child).is_empty ())
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 1052,1058 ****
>>>     		  slp_tree grandchild;
>>>        		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! 		    if (grandchild != NULL)
>>>     		      break;
>>>     		  if (!grandchild)
>>>     		    {
>>> --- 1064,1070 ----
>>>     		  slp_tree grandchild;
>>>        		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j,
>>> grandchild)
>>> ! 		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
>>>     		      break;
>>>     		  if (!grandchild)
>>>     		    {
>>> *************** vect_build_slp_tree (vec_info *vinfo,
>>> *** 1067,1089 ****
>>>     				       "Building parent vector operands from "
>>>     				       "scalars instead\n");
>>>     		      oprnd_info->def_stmts = vNULL;
>>> ! 		      vect_free_slp_tree (child);
>>> ! 		      SLP_TREE_CHILDREN (*node).quick_push (NULL);
>>>     		      continue;
>>>     		    }
>>>     		}
>>>     - 	      /* ... so if successful we can apply the operand swapping
>>> - 		 to the GIMPLE IL.  This is necessary because for example
>>> - 		 vect_get_slp_defs uses operand indexes and thus expects
>>> - 		 canonical operand order.  */
>>> - 	      for (j = 0; j < group_size; ++j)
>>> - 		if (!matches[j])
>>> - 		  {
>>> - 		    gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
>>> - 		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
>>> - 				       gimple_assign_rhs2_ptr (stmt));
>>> - 		  }
>>>     	      oprnd_info->def_stmts = vNULL;
>>>     	      SLP_TREE_CHILDREN (*node).quick_push (child);
>>>     	      continue;
>>> --- 1079,1090 ----
>>>     				       "Building parent vector operands from "
>>>     				       "scalars instead\n");
>>>     		      oprnd_info->def_stmts = vNULL;
>>> ! 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
>>> ! 		      SLP_TREE_CHILDREN (*node).quick_push (child);
>>>     		      continue;
>>>     		    }
>>>     		}
>>>        	      oprnd_info->def_stmts = vNULL;
>>>     	      SLP_TREE_CHILDREN (*node).quick_push (child);
>>>     	      continue;
>>> *************** vect_print_slp_tree (int dump_kind, loca
>>> *** 1114,1123 ****
>>>       gimple *stmt;
>>>       slp_tree child;
>>>     !   if (!node)
>>> !     return;
>>> !
>>> !   dump_printf_loc (dump_kind, loc, "node\n");
>>>       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>>         {
>>>           dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
>>> --- 1115,1123 ----
>>>       gimple *stmt;
>>>       slp_tree child;
>>>     !   dump_printf_loc (dump_kind, loc, "node%s\n",
>>> ! 		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
>>> ! 		   ? " (external)" : "");
>>>       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>>         {
>>>           dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
>>> *************** vect_mark_slp_stmts (slp_tree node, enum
>>> *** 1140,1146 ****
>>>       gimple *stmt;
>>>       slp_tree child;
>>>     !   if (!node)
>>>         return;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> --- 1140,1146 ----
>>>       gimple *stmt;
>>>       slp_tree child;
>>>     !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>>         return;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> *************** vect_mark_slp_stmts_relevant (slp_tree n
>>> *** 1162,1168 ****
>>>       stmt_vec_info stmt_info;
>>>       slp_tree child;
>>>     !   if (!node)
>>>         return;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> --- 1162,1168 ----
>>>       stmt_vec_info stmt_info;
>>>       slp_tree child;
>>>     !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>>         return;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1400,1406 ****
>>>     			 stmt_vector_for_cost *body_cost_vec,
>>>     			 unsigned ncopies_for_cost)
>>>     {
>>> !   unsigned i;
>>>       slp_tree child;
>>>       gimple *stmt, *s;
>>>       stmt_vec_info stmt_info;
>>> --- 1400,1406 ----
>>>     			 stmt_vector_for_cost *body_cost_vec,
>>>     			 unsigned ncopies_for_cost)
>>>     {
>>> !   unsigned i, j;
>>>       slp_tree child;
>>>       gimple *stmt, *s;
>>>       stmt_vec_info stmt_info;
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1409,1415 ****
>>>          /* Recurse down the SLP tree.  */
>>>       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> !     if (child)
>>>           vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>>>     			       body_cost_vec, ncopies_for_cost);
>>>     --- 1409,1415 ----
>>>          /* Recurse down the SLP tree.  */
>>>       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>>>           vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
>>>     			       body_cost_vec, ncopies_for_cost);
>>>     *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1464,1472 ****
>>> --- 1464,1479 ----
>>>     	}
>>>         }
>>>     +   /* Push SLP node def-type to stmts.  */
>>> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
>>> (child);
>>> +
>>>       /* Scan operands and account for prologue cost of constants/externals.
>>>          ???  This over-estimates cost for multiple uses and should be
>>>          re-engineered.  */
>>> +   stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>>>       lhs = gimple_get_lhs (stmt);
>>>       for (i = 0; i < gimple_num_ops (stmt); ++i)
>>>         {
>>> *************** vect_analyze_slp_cost_1 (slp_instance in
>>> *** 1489,1494 ****
>>> --- 1496,1507 ----
>>>     			      stmt_info, 0, vect_prologue);
>>>     	}
>>>         }
>>> +
>>> +   /* Restore stmt def-types.  */
>>> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>>>     }
>>>        /* Compute the cost for the SLP instance INSTANCE.  */
>>> *************** vect_analyze_slp_instance (vec_info *vin
>>> *** 1795,1800 ****
>>> --- 1808,1840 ----
>>>                 }
>>>             }
>>>     +       /* If the loads and stores can be handled with load/store-lane
>>> +          instructions do not generate this SLP instance.  */
>>> +       if (is_a <loop_vec_info> (vinfo)
>>> + 	  && loads_permuted
>>> + 	  && dr && vect_store_lanes_supported (vectype, group_size))
>>> + 	{
>>> + 	  slp_tree load_node;
>>> + 	  FOR_EACH_VEC_ELT (loads, i, load_node)
>>> + 	    {
>>> + 	      gimple *first_stmt = GROUP_FIRST_ELEMENT
>>> + 		  (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
>>> + 	      stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
>>> + 	      if (! vect_load_lanes_supported (STMT_VINFO_VECTYPE
>>> (stmt_vinfo),
>>> + 					       GROUP_SIZE (stmt_vinfo)))
>>> + 		break;
>>> + 	    }
>>> + 	  if (i == loads.length ())
>>> + 	    {
>>> + 	      if (dump_enabled_p ())
>>> + 		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
>>> + 				 "Built SLP cancelled: can use "
>>> + 				 "load/store-lanes\n");
>>> +               vect_free_slp_instance (new_instance);
>>> +               return false;
>>> + 	    }
>>> + 	}
>>> +
>>>           vinfo->slp_instances.safe_push (new_instance);
>>>              if (dump_enabled_p ())
>>> *************** vect_detect_hybrid_slp_stmts (slp_tree n
>>> *** 2004,2010 ****
>>>         }
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
>>> !     if (child)
>>>           vect_detect_hybrid_slp_stmts (child, i, stype);
>>>     }
>>>     --- 2044,2050 ----
>>>         }
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
>>> !     if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
>>>           vect_detect_hybrid_slp_stmts (child, i, stype);
>>>     }
>>>     *************** static bool
>>> *** 2185,2201 ****
>>>     vect_slp_analyze_node_operations (slp_tree node)
>>>     {
>>>       bool dummy;
>>> !   int i;
>>>       gimple *stmt;
>>>       slp_tree child;
>>>     !   if (!node)
>>>         return true;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>         if (!vect_slp_analyze_node_operations (child))
>>>           return false;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>>         {
>>>           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>>> --- 2225,2248 ----
>>>     vect_slp_analyze_node_operations (slp_tree node)
>>>     {
>>>       bool dummy;
>>> !   int i, j;
>>>       gimple *stmt;
>>>       slp_tree child;
>>>     !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>>         return true;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>         if (!vect_slp_analyze_node_operations (child))
>>>           return false;
>>>     +   /* Push SLP node def-type to stmts.  */
>>> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
>>> (child);
>>> +
>>> +   bool res = true;
>>>       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
>>>         {
>>>           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>>> *************** vect_slp_analyze_node_operations (slp_tr
>>> *** 2203,2212 ****
>>>           gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>>>              if (!vect_analyze_stmt (stmt, &dummy, node))
>>> ! 	return false;
>>>         }
>>>     !   return true;
>>>     }
>>>        --- 2250,2268 ----
>>>           gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
>>>              if (!vect_analyze_stmt (stmt, &dummy, node))
>>> ! 	{
>>> ! 	  res = false;
>>> ! 	  break;
>>> ! 	}
>>>         }
>>>     !   /* Restore stmt def-types.  */
>>> !   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> !     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> !       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> ! 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>>> !
>>> !   return res;
>>>     }
>>>        *************** vect_bb_slp_scalar_cost (basic_block bb,
>>> *** 2286,2292 ****
>>>     	    if (!is_gimple_debug (use_stmt)
>>>     		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>>>     					     use_stmt)
>>> ! 		    || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
>>>     	      {
>>>     		(*life)[i] = true;
>>>     		BREAK_FROM_IMM_USE_STMT (use_iter);
>>> --- 2342,2348 ----
>>>     	    if (!is_gimple_debug (use_stmt)
>>>     		&& (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
>>>     					     use_stmt)
>>> ! 		    || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
>>>     	      {
>>>     		(*life)[i] = true;
>>>     		BREAK_FROM_IMM_USE_STMT (use_iter);
>>> *************** vect_bb_slp_scalar_cost (basic_block bb,
>>> *** 2310,2316 ****
>>>         }
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> !     if (child)
>>>           scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>>>          return scalar_cost;
>>> --- 2366,2372 ----
>>>         }
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> !     if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>>>           scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
>>>          return scalar_cost;
>>> *************** vect_slp_analyze_bb_1 (gimple_stmt_itera
>>> *** 2499,2513 ****
>>>           return NULL;
>>>         }
>>>     -   /* Mark all the statements that we do not want to vectorize.  */
>>> -   for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
>>> -        gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
>>> -     {
>>> -       stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
>>> -       if (STMT_SLP_TYPE (vinfo) != pure_slp)
>>> - 	STMT_VINFO_VECTORIZABLE (vinfo) = false;
>>> -     }
>>> -
>>>       if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
>>>     				    BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
>>>         {
>>> --- 2555,2560 ----
>>> *************** vect_get_slp_defs (vec<tree> ops, slp_tr
>>> *** 3085,3091 ****
>>>               child = SLP_TREE_CHILDREN (slp_node)[child_index];
>>>        	  /* We have to check both pattern and original def, if
>>> available.  */
>>> ! 	  if (child)
>>>     	    {
>>>     	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>>>     	      gimple *related
>>> --- 3132,3138 ----
>>>               child = SLP_TREE_CHILDREN (slp_node)[child_index];
>>>        	  /* We have to check both pattern and original def, if
>>> available.  */
>>> ! 	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
>>>     	    {
>>>     	      gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
>>>     	      gimple *related
>>> *************** vect_schedule_slp_instance (slp_tree nod
>>> *** 3374,3388 ****
>>>       stmt_vec_info stmt_info;
>>>       unsigned int vec_stmts_size, nunits, group_size;
>>>       tree vectype;
>>> !   int i;
>>>       slp_tree child;
>>>     !   if (!node)
>>>         return false;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>         vect_schedule_slp_instance (child, instance, vectorization_factor);
>>>          stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>>>       stmt_info = vinfo_for_stmt (stmt);
>>>     --- 3421,3441 ----
>>>       stmt_vec_info stmt_info;
>>>       unsigned int vec_stmts_size, nunits, group_size;
>>>       tree vectype;
>>> !   int i, j;
>>>       slp_tree child;
>>>     !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>>         return false;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>         vect_schedule_slp_instance (child, instance, vectorization_factor);
>>>     +   /* Push SLP node def-type to stmts.  */
>>> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE
>>> (child);
>>> +
>>>       stmt = SLP_TREE_SCALAR_STMTS (node)[0];
>>>       stmt_info = vinfo_for_stmt (stmt);
>>>     *************** vect_schedule_slp_instance (slp_tree nod
>>> *** 3501,3506 ****
>>> --- 3554,3566 ----
>>>     	}
>>>         }
>>>       is_store = vect_transform_stmt (stmt, &si, &grouped_store, node,
>>> instance);
>>> +
>>> +   /* Restore stmt def-types.  */
>>> +   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> +     if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
>>> +       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
>>> + 	STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
>>> +
>>>       return is_store;
>>>     }
>>>     *************** vect_remove_slp_scalar_calls (slp_tree n
>>> *** 3519,3525 ****
>>>       tree lhs;
>>>       stmt_vec_info stmt_info;
>>>     !   if (!node)
>>>         return;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>> --- 3579,3585 ----
>>>       tree lhs;
>>>       stmt_vec_info stmt_info;
>>>     !   if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
>>>         return;
>>>          FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
>>>
>>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2015-12-18  9:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-14 15:14 [PATCH] Fix PR68852 Richard Biener
2015-12-14 15:34 ` Richard Biener
2015-12-17 16:31 ` Kyrill Tkachov
2015-12-18  8:57   ` Richard Biener
2015-12-18  9:09     ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).