public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Fix PR tree-optimization/77550
@ 2016-09-18 11:18 Bernd Edlinger
  2016-09-19  9:35 ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Bernd Edlinger @ 2016-09-18 11:18 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener

[-- Attachment #1: Type: text/plain, Size: 633 bytes --]

Hi,

this PR shows that in vectorizable_store and vectorizable_load
as well, the vector access always uses the first dr as the alias
type for the whole access.  But that is not right, if they are
different types, like in this example.

So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
by an alias type that is correct for all references in the whole
access group.  With this patch we fall back to ptr_type_node, which
can alias anything, if the group consists of different alias sets.


Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk and gcc-6-branch?


Thanks
Bernd.

[-- Attachment #2: changelog-pr77550.txt --]
[-- Type: text/plain, Size: 393 bytes --]

gcc:
2016-09-18  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR tree-optimization/77550
	* tree-vect-stmts.c (create_array_ref): Change parameters.
	(get_group_alias_ptr_type): New function.
	(vectorizable_store, vectorizable_load): Use get_group_alias_ptr_type.

testsuite:
2016-09-18  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR tree-optimization/77550
	* g++.dg/pr77550.C: New test.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: patch-pr77550.diff --]
[-- Type: text/x-patch; name="patch-pr77550.diff", Size: 21614 bytes --]

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 240213)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -170,11 +170,10 @@ write_vector_array (gimple *stmt, gimple_stmt_iter
    (and its group).  */
 
 static tree
-create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
+create_array_ref (tree type, tree ptr, tree alias_ptr_type)
 {
-  tree mem_ref, alias_ptr_type;
+  tree mem_ref;
 
-  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
   /* Arrays have the same alignment as their type.  */
   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
@@ -5432,6 +5431,37 @@ ensure_base_align (stmt_vec_info stmt_info, struct
 }
 
 
+/* Function get_group_alias_ptr_type.
+
+   Return the alias type for the group starting at FIRST_STMT
+   containing GROUP_SIZE elements.  */
+
+static tree
+get_group_alias_ptr_type (gimple *first_stmt, int group_size)
+{
+  struct data_reference *first_dr, *next_dr;
+  gimple *next_stmt;
+  int i;
+
+  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
+  for (i = 1; i < group_size && next_stmt; i++)
+    {
+      next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
+      if (get_alias_set (DR_REF (first_dr))
+	  != get_alias_set (DR_REF (next_dr)))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "conflicting alias set types.\n");
+	  return ptr_type_node;
+	}
+      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+    }
+  return reference_alias_ptr_type (DR_REF (first_dr));
+}
+
+
 /* Function vectorizable_store.
 
    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
@@ -5482,6 +5512,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
   gimple *new_stmt;
   int vf;
   vec_load_store_type vls_type;
+  tree ref_type;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5779,6 +5810,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
       group_size = vec_num = 1;
     }
 
+  ref_type = get_group_alias_ptr_type (first_stmt, group_size);
+
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location,
                      "transform store. ncopies = %d\n", ncopies);
@@ -5804,7 +5837,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 	    (unshare_expr (DR_BASE_ADDRESS (first_dr)),
 	     size_binop (PLUS_EXPR,
 			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
-			 convert_to_ptrofftype (DR_INIT(first_dr))));
+			 convert_to_ptrofftype (DR_INIT (first_dr))));
       stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
 
       /* For a store with loop-invariant (but other than power-of-2)
@@ -5865,7 +5898,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
 
       prev_stmt_info = NULL;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+      alias_off = build_int_cst (ref_type, 0);
       next_stmt = first_stmt;
       for (g = 0; g < group_size; g++)
 	{
@@ -6081,11 +6114,10 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 	      && integer_zerop (DR_OFFSET (first_dr))
 	      && integer_zerop (DR_INIT (first_dr))
 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
-					get_alias_set (DR_REF (first_dr))))
+					get_alias_set (TREE_TYPE (ref_type))))
 	    {
 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
-	      dataref_offset = build_int_cst (reference_alias_ptr_type
-					      (DR_REF (first_dr)), 0);
+	      dataref_offset = build_int_cst (ref_type, 0);
 	      inv_p = false;
 	    }
 	  else
@@ -6136,7 +6168,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 
 	  /* Emit:
 	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
-	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+	  data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
 	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
 	  gimple_call_set_lhs (new_stmt, data_ref);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -6174,8 +6206,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 				      dataref_ptr,
 				      dataref_offset
 				      ? dataref_offset
-				      : build_int_cst (reference_alias_ptr_type
-						       (DR_REF (first_dr)), 0));
+				      : build_int_cst (ref_type, 0));
 	      align = TYPE_ALIGN_UNIT (vectype);
 	      if (aligned_access_p (first_dr))
 		misalign = 0;
@@ -6395,7 +6426,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
   tree dataref_offset = NULL_TREE;
   gimple *ptr_incr = NULL;
   int ncopies;
-  int i, j, group_size = -1, group_gap_adj;
+  int i, j, group_size, group_gap_adj;
   tree msq = NULL_TREE, lsq;
   tree offset = NULL_TREE;
   tree byte_offset = NULL_TREE;
@@ -6417,6 +6448,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
   tree aggr_type;
   gather_scatter_info gs_info;
   vec_info *vinfo = stmt_info->vinfo;
+  tree ref_type;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -6773,11 +6805,20 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
       gcc_assert (!nested_in_vect_loop);
 
       if (slp && grouped_load)
-	first_dr = STMT_VINFO_DATA_REF
-	    (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+	{
+	  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+	  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+	}
       else
-	first_dr = dr;
+	{
+	  first_stmt = stmt;
+	  first_dr = dr;
+	  group_size = 1;
+	}
 
+      ref_type = get_group_alias_ptr_type (first_stmt, group_size);
+
       stride_base
 	= fold_build_pointer_plus
 	    (DR_BASE_ADDRESS (first_dr),
@@ -6820,7 +6861,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 
       prev_stmt_info = NULL;
       running_off = offvar;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+      alias_off = build_int_cst (ref_type, 0);
       int nloads = nunits;
       int lnel = 1;
       tree ltype = TREE_TYPE (vectype);
@@ -6969,6 +7010,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
       group_gap_adj = 0;
     }
 
+  ref_type = get_group_alias_ptr_type (first_stmt, group_size);
+
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
   gcc_assert (alignment_support_scheme);
   /* Targets with load-lane instructions must not require explicit
@@ -7127,13 +7170,12 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 	      && integer_zerop (DR_OFFSET (first_dr))
 	      && integer_zerop (DR_INIT (first_dr))
 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
-					get_alias_set (DR_REF (first_dr)))
+					get_alias_set (TREE_TYPE (ref_type)))
 	      && (alignment_support_scheme == dr_aligned
 		  || alignment_support_scheme == dr_unaligned_supported))
 	    {
 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
-	      dataref_offset = build_int_cst (reference_alias_ptr_type
-					      (DR_REF (first_dr)), 0);
+	      dataref_offset = build_int_cst (ref_type, 0);
 	      inv_p = false;
 	    }
 	  else if (first_stmt_for_drptr
@@ -7179,7 +7221,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 
 	  /* Emit:
 	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
-	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+	  data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
 	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
 	  gimple_call_set_lhs (new_stmt, vec_array);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -7215,8 +7257,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		      = fold_build2 (MEM_REF, vectype, dataref_ptr,
 				     dataref_offset
 				     ? dataref_offset
-				     : build_int_cst (reference_alias_ptr_type
-						      (DR_REF (first_dr)), 0));
+				     : build_int_cst (ref_type, 0));
 		    align = TYPE_ALIGN_UNIT (vectype);
 		    if (alignment_support_scheme == dr_aligned)
 		      {
@@ -7272,8 +7313,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		    data_ref
 		      = build2 (MEM_REF, vectype, ptr,
-				build_int_cst (reference_alias_ptr_type
-						 (DR_REF (first_dr)), 0));
+				build_int_cst (ref_type, 0));
 		    vec_dest = vect_create_destination_var (scalar_dest,
 							    vectype);
 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
@@ -7298,8 +7338,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		    data_ref
 		      = build2 (MEM_REF, vectype, ptr,
-				build_int_cst (reference_alias_ptr_type
-						 (DR_REF (first_dr)), 0));
+				build_int_cst (ref_type, 0));
 		    break;
 		  }
 		case dr_explicit_realign_optimized:
@@ -7315,8 +7354,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		  data_ref
 		    = build2 (MEM_REF, vectype, new_temp,
-			      build_int_cst (reference_alias_ptr_type
-					       (DR_REF (first_dr)), 0));
+			      build_int_cst (ref_type, 0));
 		  break;
 		default:
 		  gcc_unreachable ();
Index: gcc/testsuite/g++.dg/pr77550.C
===================================================================
--- gcc/testsuite/g++.dg/pr77550.C	(revision 0)
+++ gcc/testsuite/g++.dg/pr77550.C	(working copy)
@@ -0,0 +1,295 @@
+// { dg-do run }
+// { dg-options "-std=c++14 -O3" }
+
+namespace std {
+typedef int size_t;
+inline namespace __cxx11 {}
+template <typename...> using _Require = void;
+template <typename> using __void_t = void;
+template <typename, typename, template <typename...> class, typename...>
+struct A {
+  using type = int;
+};
+template <typename _Default, template <typename...> class _Op,
+          typename... _Args>
+struct A<_Default, __void_t<_Op<_Args...>>, _Op, _Args...> {
+  using type = _Op<_Args...>;
+};
+template <typename _Default, template <typename...> class _Op,
+          typename... _Args>
+using __detected_or = A<_Default, void, _Op, _Args...>;
+template <typename _Default, template <typename...> class _Op,
+          typename... _Args>
+using __detected_or_t = typename __detected_or<_Default, _Op, _Args...>::type;
+template <template <typename...> class _Default,
+          template <typename...> class _Op, typename... _Args>
+using __detected_or_t_ = __detected_or_t<_Default<_Args...>, _Op, _Args...>;
+template <typename _InputIterator> void __distance(_InputIterator p1) { ++p1; }
+template <typename _InputIterator>
+void distance(_InputIterator p1, _InputIterator) {
+  __distance(p1);
+}
+template <typename, typename> using __replace_first_arg_t = int;
+struct B {
+  template <typename _Up> using rebind = _Up *;
+};
+template <typename, typename> using __ptr_rebind = B;
+template <typename _Tp> _Tp max(_Tp p1, _Tp) { return p1; }
+}
+void *operator new(unsigned long, void *p2) { return p2; }
+template <typename _Tp> struct C {
+  typedef _Tp *pointer;
+  pointer allocate(int p1) {
+    return static_cast<_Tp *>(operator new(p1 * sizeof(_Tp)));
+  }
+  template <typename _Up> void construct(_Up *p1) { new (p1) _Up; }
+};
+namespace std {
+template <typename _Tp> using __allocator_base = C<_Tp>;
+template <typename _Tp> struct allocator : __allocator_base<_Tp> {
+  typedef unsigned long size_type;
+  template <typename _Tp1> struct rebind { typedef allocator<_Tp1> other; };
+};
+struct D {
+  template <typename _Alloc, typename _Up>
+  using __rebind = typename _Alloc::template rebind<_Up>::other;
+  template <typename _Tp> using __pointer = typename _Tp::pointer;
+  template <typename _Tp> using __c_pointer = typename _Tp::const_pointer;
+  template <typename _Tp> using __size_type = typename _Tp::size_type;
+};
+template <typename _Alloc, typename _Up>
+using __alloc_rebind =
+    __detected_or_t_<__replace_first_arg_t, D::__rebind, _Alloc, _Up>;
+template <typename _Alloc> struct K : D {
+  typedef _Alloc value_type;
+  using pointer = __detected_or_t<value_type, __pointer, _Alloc>;
+  using const_pointer =
+      __detected_or_t<__ptr_rebind<pointer, value_type>, __c_pointer>;
+  using size_type = __detected_or_t<int, __size_type, _Alloc>;
+  template <typename _Tp> using rebind_alloc = __alloc_rebind<_Alloc, _Tp>;
+  template <typename _Tp> static _Require<> _S_construct(_Tp p1) {
+    _Alloc __a;
+    __a.construct(p1);
+  }
+  static pointer allocate(_Alloc p1, size_type p2) { return p1.allocate(p2); }
+  template <typename _Tp, typename _Args>
+  static auto construct(_Alloc, _Tp p2, _Args) {
+    _S_construct(p2);
+  }
+};
+}
+template <typename _Alloc> struct O : std::K<_Alloc> {
+  template <typename _Tp> struct rebind {
+    typedef typename std::K<_Alloc>::template rebind_alloc<_Tp> other;
+  };
+};
+namespace std {
+template <typename _ForwardIterator, typename _Tp, typename _Allocator>
+void __uninitialized_fill_a(_ForwardIterator p1, _ForwardIterator, _Tp,
+                            _Allocator p4) try {
+  O<_Allocator>::construct(p4, p1, 0);
+} catch (...) {
+}
+size_t __deque_buf_size(size_t p1) { return 1 ? 512 / p1 : 0; }
+template <typename _Tp, typename _Ref, typename> struct F {
+  template <typename _Up> using __ptr_to = B::rebind<_Up>;
+  template <typename _CvTp> using __iter = F<_Tp, _CvTp &, __ptr_to<_CvTp>>;
+  typedef __ptr_to<_Tp> _Elt_pointer;
+  typedef __ptr_to<_Elt_pointer> _Map_pointer;
+  _Elt_pointer _M_cur;
+  _Elt_pointer _M_first;
+  _Elt_pointer _M_last;
+  _Map_pointer _M_node;
+  F() {}
+  F(__iter<_Tp> &p1) : _M_cur(p1._M_cur), _M_node(p1._M_node) {}
+  _Ref operator*() { return *_M_cur; }
+  void operator++() {
+    _M_set_node(_M_node + 1);
+    _M_cur = _M_first;
+  }
+  void _M_set_node(_Map_pointer p1) {
+    _M_node = p1;
+    _M_first = *p1;
+    _M_last = _M_first;
+  }
+};
+template <typename _Tp, typename _Ref, typename _Ptr>
+int operator==(F<_Tp, _Ref, _Ptr> p1, F<_Tp, _Ref, _Ptr> p2) {
+  return p1._M_cur == p2._M_cur;
+}
+template <typename _Tp, typename _Ref, typename _Ptr>
+int operator!=(F<_Tp, _Ref, _Ptr> p1, F<_Tp, _Ref, _Ptr> p2) {
+  return !(p1 == p2);
+}
+template <typename _Tp, typename _Alloc> struct _Deque_base {
+  typedef typename _Alloc::template rebind<_Tp>::other _Tp_alloc_type;
+  typedef O<_Tp_alloc_type> _Alloc_traits;
+  typedef typename _Alloc_traits::pointer _Ptr;
+  typedef typename _Alloc_traits::template rebind<_Ptr>::other _Map_alloc_type;
+  typedef F<_Tp, _Tp &, _Ptr> iterator;
+  typedef F<_Tp, _Tp &, typename _Alloc_traits::const_pointer> const_iterator;
+  _Deque_base(_Alloc p1, size_t) : _M_impl(p1) { _M_initialize_map(0); }
+  ~_Deque_base() noexcept;
+  typedef typename iterator::_Map_pointer _Map_pointer;
+  struct L : _Tp_alloc_type {
+    _Map_pointer _M_map;
+    size_t _M_map_size;
+    iterator _M_start;
+    iterator _M_finish;
+    L(_Tp_alloc_type) {}
+  };
+  _Tp_alloc_type _M_get_Tp_allocator() { return _M_impl; }
+  _Ptr _M_allocate_node() { return O<_Tp_alloc_type>::allocate(_M_impl, 1); }
+  _Map_pointer _M_allocate_map(size_t p1) {
+    _Map_alloc_type __map_alloc;
+    return O<_Map_alloc_type>::allocate(__map_alloc, p1);
+  }
+  void _M_initialize_map(size_t);
+  void _M_create_nodes(_Map_pointer, _Map_pointer);
+  enum { _S_initial_map_size = 8 };
+  L _M_impl;
+};
+template <typename _Tp, typename _Alloc>
+_Deque_base<_Tp, _Alloc>::~_Deque_base() noexcept {}
+template <typename _Tp, typename _Alloc>
+void _Deque_base<_Tp, _Alloc>::_M_initialize_map(size_t) {
+  size_t __num_nodes(__deque_buf_size(sizeof(_Tp)));
+  _M_impl._M_map_size = max((size_t)_S_initial_map_size, 0);
+  _M_impl._M_map = _M_allocate_map(_M_impl._M_map_size);
+  _Map_pointer __nstart(_M_impl._M_map);
+  _Map_pointer __nfinish = __nstart + __num_nodes;
+  try {
+    _M_create_nodes(__nstart, __nfinish);
+  } catch (...) {
+  }
+  _M_impl._M_start._M_set_node(__nstart);
+  _M_impl._M_finish._M_set_node(__nfinish - 1);
+  _M_impl._M_start._M_cur = _M_impl._M_start._M_first;
+  _M_impl._M_finish._M_cur = _M_impl._M_finish._M_first;
+}
+template <typename _Tp, typename _Alloc>
+void _Deque_base<_Tp, _Alloc>::_M_create_nodes(_Map_pointer __nstart,
+                                               _Map_pointer __nfinish) {
+  _Map_pointer __cur;
+  try {
+    for (__cur = __nstart; __cur < __nfinish; ++__cur)
+      *__cur = _M_allocate_node();
+  } catch (...) {
+  }
+}
+template <typename _Tp, typename _Alloc = allocator<_Tp>>
+struct deque : _Deque_base<_Tp, _Alloc> {
+  typedef _Deque_base<_Tp, _Alloc> _Base;
+  typedef typename _Base::_Map_pointer _Map_pointer;
+  typedef _Tp value_type;
+  typedef typename _Base::const_iterator const_iterator;
+  typedef size_t size_type;
+  typedef _Alloc allocator_type;
+  using _Base::_M_get_Tp_allocator;
+  deque(size_type, value_type __value, allocator_type __a = allocator_type())
+      : _Base(__a, 0) {
+    _M_fill_initialize(__value);
+  }
+  const_iterator begin() { return this->_M_impl._M_start; }
+  const_iterator end() { return this->_M_impl._M_finish; }
+  void _M_fill_initialize(const value_type &);
+};
+template <typename _Container> auto begin(_Container p1) { return p1.begin(); }
+template <typename _Container> auto end(_Container p1) { return p1.end(); }
+template <typename _Container> auto cbegin(_Container p1) { return begin(p1); }
+template <typename _Container> auto cend(_Container p1) { return end(p1); }
+template <typename _Tp, typename _Alloc>
+void deque<_Tp, _Alloc>::_M_fill_initialize(const value_type &) {
+  _Map_pointer __cur;
+  try {
+    for (__cur = this->_M_impl._M_start._M_node;
+         __cur < this->_M_impl._M_finish._M_node; ++__cur)
+      __uninitialized_fill_a(*__cur, *__cur, 0, _M_get_Tp_allocator());
+  } catch (...) {
+  }
+}
+template <class> struct char_traits;
+namespace __cxx11 {
+template <typename _CharT, typename = char_traits<_CharT>,
+          typename = allocator<_CharT>>
+struct basic_string;
+typedef basic_string<char> string;
+}
+template <> struct char_traits<char> {
+  typedef char char_type;
+  static int compare(char_type, char_type *p2, size_t p3) {
+    return __builtin_memcmp(0, p2, p3);
+  }
+};
+namespace __cxx11 {
+template <typename, typename, typename> struct basic_string {
+  typedef O<allocator<char>> _Alloc_traits;
+  typedef _Alloc_traits::size_type size_type;
+  typedef _Alloc_traits::pointer pointer;
+  struct _Alloc_hider {
+    _Alloc_hider(pointer, allocator<char> && = allocator<char>());
+  } _M_dataplus;
+  size_type _M_string_length;
+  enum { _S_local_capacity = 15 } _M_local_buf[_S_local_capacity];
+  pointer _M_local_data();
+  void _M_set_length(size_type);
+  basic_string() : _M_dataplus(_M_local_data()) { _M_set_length(0); }
+  basic_string(const basic_string &) : _M_dataplus(0) {}
+  size_type size() { return _M_string_length; }
+  char *data() const {}
+};
+}
+template <typename _CharT>
+int operator==(basic_string<_CharT> &p1, const basic_string<_CharT> &p2) {
+  return p1.size() && char_traits<_CharT>::compare(0, p2.data(), p1.size());
+}
+}
+struct G {
+  template <class Facade> static void increment(Facade p1) { p1.increment(); }
+};
+template <class Derived> struct H {
+  Derived derived() { return *static_cast<Derived *>(this); }
+  void operator++() {
+    Derived __trans_tmp_1 = derived();
+    G::increment(__trans_tmp_1);
+  }
+};
+template <class Derived> struct I { typedef H<Derived> type; };
+template <class Derived, class Base> struct M : I<Derived>::type {
+  M(Base p1) : m_iterator(p1) {}
+  Base base() { return m_iterator; }
+  Base &base_reference() { return m_iterator; }
+  Base m_iterator;
+};
+template <class, class> struct N;
+template <class Predicate, class Iterator> struct J {
+  typedef M<N<Predicate, Iterator>, Iterator> type;
+};
+template <class Predicate, class Iterator>
+struct N : J<Predicate, Iterator>::type {
+  typedef typename J<Predicate, Iterator>::type super_t;
+  N(Predicate p1, Iterator p2, Iterator p3)
+      : super_t(p2), m_predicate(p1), m_end(p3) {}
+  void increment() {
+    while (this->base() != m_end && !m_predicate(*this->base()))
+      ++this->base_reference();
+  }
+  Predicate m_predicate;
+  Iterator m_end;
+};
+template <class Predicate, class Iterator>
+N<Predicate, Iterator> make_filter_iterator(Predicate p1, Iterator p2,
+                                            Iterator p3) {
+  return N<Predicate, Iterator>(p1, p2, p3);
+}
+struct Foo {
+  std::string bar;
+};
+int main() {
+  std::deque<Foo> foos(0, {});
+  std::string test;
+  auto p = [test](auto &foo) { return foo.bar == test; };
+  auto begin = make_filter_iterator(p, cbegin(foos), cend(foos));
+  auto end = make_filter_iterator(p, cend(foos), cend(foos));
+  distance(begin, end);
+}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-18 11:18 [PATCH] Fix PR tree-optimization/77550 Bernd Edlinger
@ 2016-09-19  9:35 ` Richard Biener
  2016-09-19 16:20   ` Bernd Edlinger
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2016-09-19  9:35 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: gcc-patches

On Sun, 18 Sep 2016, Bernd Edlinger wrote:

> Hi,
> 
> this PR shows that in vectorizable_store and vectorizable_load
> as well, the vector access always uses the first dr as the alias
> type for the whole access.  But that is not right, if they are
> different types, like in this example.
> 
> So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
> by an alias type that is correct for all references in the whole
> access group.  With this patch we fall back to ptr_type_node, which
> can alias anything, if the group consists of different alias sets.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk and gcc-6-branch?

+/* Function get_group_alias_ptr_type.
+
+   Return the alias type for the group starting at FIRST_STMT
+   containing GROUP_SIZE elements.  */
+
+static tree
+get_group_alias_ptr_type (gimple *first_stmt, int group_size)
+{
+  struct data_reference *first_dr, *next_dr;
+  gimple *next_stmt;
+  int i;
+
+  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
+  for (i = 1; i < group_size && next_stmt; i++)
+    {


there is no need to pass in group_size, it's enough to walk
GROUP_NEXT_ELEMENT until it becomes NULL.

Ok with removing the redundant arg.

Thanks,
Richard.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-19  9:35 ` Richard Biener
@ 2016-09-19 16:20   ` Bernd Edlinger
  2016-09-20  8:17     ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Bernd Edlinger @ 2016-09-19 16:20 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On 09/19/16 11:25, Richard Biener wrote:
> On Sun, 18 Sep 2016, Bernd Edlinger wrote:
>
>> Hi,
>>
>> this PR shows that in vectorizable_store and vectorizable_load
>> as well, the vector access always uses the first dr as the alias
>> type for the whole access.  But that is not right, if they are
>> different types, like in this example.
>>
>> So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
>> by an alias type that is correct for all references in the whole
>> access group.  With this patch we fall back to ptr_type_node, which
>> can alias anything, if the group consists of different alias sets.
>>
>>
>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>> Is it OK for trunk and gcc-6-branch?
>
> +/* Function get_group_alias_ptr_type.
> +
> +   Return the alias type for the group starting at FIRST_STMT
> +   containing GROUP_SIZE elements.  */
> +
> +static tree
> +get_group_alias_ptr_type (gimple *first_stmt, int group_size)
> +{
> +  struct data_reference *first_dr, *next_dr;
> +  gimple *next_stmt;
> +  int i;
> +
> +  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> +  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
> +  for (i = 1; i < group_size && next_stmt; i++)
> +    {
>
>
> there is no need to pass in group_size, it's enough to walk
> GROUP_NEXT_ELEMENT until it becomes NULL.
>
> Ok with removing the redundant arg.
>
> Thanks,
> Richard.
>

Hmmm, I'm afraid this needs one more iteration.


I tried first to check if there are no stmts after the group_size
and noticed there are cases when group_size is 0,
for instance in gcc.dg/torture/pr36244.c.

I think there is a bug in vectorizable_load, here:

  if (grouped_load)
    {
      first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
       /* For SLP vectorization we directly vectorize a subchain
          without permutation.  */
       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
         first_stmt = ;

       group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));

       = 0, and even worse:

       group_gap_adj = vf * group_size - nunits * vec_num;

       = -4 !

apparently GROUP_SIZE is only valid on the GROUP_FIRST_ELEMENT,
while it may be 0 on SLP_TREE_SCALAR_STMTS (slp_node)[0]

moving the GROUP_SIZE up before first_stmt is overwritten
results in no different code....


Bernd.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-19 16:20   ` Bernd Edlinger
@ 2016-09-20  8:17     ` Richard Biener
  2016-09-20 16:12       ` Bernd Edlinger
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2016-09-20  8:17 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: gcc-patches

On Mon, 19 Sep 2016, Bernd Edlinger wrote:

> On 09/19/16 11:25, Richard Biener wrote:
> > On Sun, 18 Sep 2016, Bernd Edlinger wrote:
> >
> >> Hi,
> >>
> >> this PR shows that in vectorizable_store and vectorizable_load
> >> as well, the vector access always uses the first dr as the alias
> >> type for the whole access.  But that is not right, if they are
> >> different types, like in this example.
> >>
> >> So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
> >> by an alias type that is correct for all references in the whole
> >> access group.  With this patch we fall back to ptr_type_node, which
> >> can alias anything, if the group consists of different alias sets.
> >>
> >>
> >> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> >> Is it OK for trunk and gcc-6-branch?
> >
> > +/* Function get_group_alias_ptr_type.
> > +
> > +   Return the alias type for the group starting at FIRST_STMT
> > +   containing GROUP_SIZE elements.  */
> > +
> > +static tree
> > +get_group_alias_ptr_type (gimple *first_stmt, int group_size)
> > +{
> > +  struct data_reference *first_dr, *next_dr;
> > +  gimple *next_stmt;
> > +  int i;
> > +
> > +  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> > +  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
> > +  for (i = 1; i < group_size && next_stmt; i++)
> > +    {
> >
> >
> > there is no need to pass in group_size, it's enough to walk
> > GROUP_NEXT_ELEMENT until it becomes NULL.
> >
> > Ok with removing the redundant arg.
> >
> > Thanks,
> > Richard.
> >
> 
> Hmmm, I'm afraid this needs one more iteration.
> 
> 
> I tried first to check if there are no stmts after the group_size
> and noticed there are cases when group_size is 0,
> for instance in gcc.dg/torture/pr36244.c.
> 
> I think there is a bug in vectorizable_load, here:
> 
>   if (grouped_load)
>     {
>       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>        /* For SLP vectorization we directly vectorize a subchain
>           without permutation.  */
>        if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
>          first_stmt = ;
> 
>        group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> 
>        = 0, and even worse:
> 
>        group_gap_adj = vf * group_size - nunits * vec_num;
> 
>        = -4 !
> 
> apparently GROUP_SIZE is only valid on the GROUP_FIRST_ELEMENT,

Yes.  I'm not sure group_size or group_gap_adj are used in the
slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists () case but moving
the computation up before we re-set first_stmt is probably a good idea.

> while it may be 0 on SLP_TREE_SCALAR_STMTS (slp_node)[0]
> 
> moving the GROUP_SIZE up before first_stmt is overwritten
> results in no different code....

See above - it's eventually unused.  The load/store vectorization code
is quite twisted ;)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-20  8:17     ` Richard Biener
@ 2016-09-20 16:12       ` Bernd Edlinger
  2016-09-21  7:27         ` Richard Biener
  0 siblings, 1 reply; 14+ messages in thread
From: Bernd Edlinger @ 2016-09-20 16:12 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 3568 bytes --]

On 09/20/16 09:41, Richard Biener wrote:
> On Mon, 19 Sep 2016, Bernd Edlinger wrote:
>
>> On 09/19/16 11:25, Richard Biener wrote:
>>> On Sun, 18 Sep 2016, Bernd Edlinger wrote:
>>>
>>>> Hi,
>>>>
>>>> this PR shows that in vectorizable_store and vectorizable_load
>>>> as well, the vector access always uses the first dr as the alias
>>>> type for the whole access.  But that is not right, if they are
>>>> different types, like in this example.
>>>>
>>>> So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
>>>> by an alias type that is correct for all references in the whole
>>>> access group.  With this patch we fall back to ptr_type_node, which
>>>> can alias anything, if the group consists of different alias sets.
>>>>
>>>>
>>>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>>>> Is it OK for trunk and gcc-6-branch?
>>>
>>> +/* Function get_group_alias_ptr_type.
>>> +
>>> +   Return the alias type for the group starting at FIRST_STMT
>>> +   containing GROUP_SIZE elements.  */
>>> +
>>> +static tree
>>> +get_group_alias_ptr_type (gimple *first_stmt, int group_size)
>>> +{
>>> +  struct data_reference *first_dr, *next_dr;
>>> +  gimple *next_stmt;
>>> +  int i;
>>> +
>>> +  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
>>> +  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
>>> +  for (i = 1; i < group_size && next_stmt; i++)
>>> +    {
>>>
>>>
>>> there is no need to pass in group_size, it's enough to walk
>>> GROUP_NEXT_ELEMENT until it becomes NULL.
>>>
>>> Ok with removing the redundant arg.
>>>
>>> Thanks,
>>> Richard.
>>>
>>
>> Hmmm, I'm afraid this needs one more iteration.
>>
>>
>> I tried first to check if there are no stmts after the group_size
>> and noticed there are cases when group_size is 0,
>> for instance in gcc.dg/torture/pr36244.c.
>>
>> I think there is a bug in vectorizable_load, here:
>>
>>    if (grouped_load)
>>      {
>>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>>         /* For SLP vectorization we directly vectorize a subchain
>>            without permutation.  */
>>         if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
>>           first_stmt = ;
>>
>>         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
>>
>>         = 0, and even worse:
>>
>>         group_gap_adj = vf * group_size - nunits * vec_num;
>>
>>         = -4 !
>>
>> apparently GROUP_SIZE is only valid on the GROUP_FIRST_ELEMENT,
>
> Yes.  I'm not sure group_size or group_gap_adj are used in the
> slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists () case but moving
> the computation up before we re-set first_stmt is probably a good idea.
>
>> while it may be 0 on SLP_TREE_SCALAR_STMTS (slp_node)[0]
>>
>> moving the GROUP_SIZE up before first_stmt is overwritten
>> results in no different code....
>
> See above - it's eventually unused.  The load/store vectorization code
> is quite twisted ;)
>

Agreed.

Here is the new version of the patch:

Moved the goups_size up, and everything works fine.

Removed the parameter from get_group_alias_ptr_type.

I think in the case, where first_stmt is not set to
GROUP_FIRST_ELEMENT (stmt_info) but directly to stmt,
it is likely somewhere in a list, so it is not necessary
to walk the GROUP_NEXT_ELEMENT, so I would like to call
reference_alias_ptr_type directly in that case.


Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
Is it OK for trunk and gcc-6 branch?


Thanks
Bernd.

[-- Attachment #2: changelog-pr77550.txt --]
[-- Type: text/plain, Size: 406 bytes --]

gcc:
2016-09-18  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR tree-optimization/77550
	* tree-vect-stmts.c (create_array_ref): Change parameters.
	(get_group_alias_ptr_type): New function.
	(vectorizable_store, vectorizable_load): Use get_group_alias_ptr_type.

testsuite:
2016-09-18  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	PR tree-optimization/77550
	* g++.dg/pr77550.C: New test.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: patch-pr77550.diff --]
[-- Type: text/x-patch; name="patch-pr77550.diff", Size: 22624 bytes --]

Index: gcc/tree-vect-stmts.c
===================================================================
--- gcc/tree-vect-stmts.c	(revision 240251)
+++ gcc/tree-vect-stmts.c	(working copy)
@@ -170,11 +170,10 @@ write_vector_array (gimple *stmt, gimple_stmt_iter
    (and its group).  */
 
 static tree
-create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
+create_array_ref (tree type, tree ptr, tree alias_ptr_type)
 {
-  tree mem_ref, alias_ptr_type;
+  tree mem_ref;
 
-  alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
   mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
   /* Arrays have the same alignment as their type.  */
   set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
@@ -5432,6 +5431,35 @@ ensure_base_align (stmt_vec_info stmt_info, struct
 }
 
 
+/* Function get_group_alias_ptr_type.
+
+   Return the alias type for the group starting at FIRST_STMT.  */
+
+static tree
+get_group_alias_ptr_type (gimple *first_stmt)
+{
+  struct data_reference *first_dr, *next_dr;
+  gimple *next_stmt;
+
+  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
+  while (next_stmt)
+    {
+      next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
+      if (get_alias_set (DR_REF (first_dr))
+	  != get_alias_set (DR_REF (next_dr)))
+	{
+	  if (dump_enabled_p ())
+	    dump_printf_loc (MSG_NOTE, vect_location,
+			     "conflicting alias set types.\n");
+	  return ptr_type_node;
+	}
+      next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
+    }
+  return reference_alias_ptr_type (DR_REF (first_dr));
+}
+
+
 /* Function vectorizable_store.
 
    Check if STMT defines a non scalar data-ref (array/pointer/structure) that
@@ -5482,6 +5510,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
   gimple *new_stmt;
   int vf;
   vec_load_store_type vls_type;
+  tree ref_type;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -5771,6 +5800,8 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
         /* VEC_NUM is the number of vect stmts to be created for this 
            group.  */
 	vec_num = group_size;
+
+      ref_type = get_group_alias_ptr_type (first_stmt);
     }
   else
     {
@@ -5777,6 +5808,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
       first_stmt = stmt;
       first_dr = dr;
       group_size = vec_num = 1;
+      ref_type = reference_alias_ptr_type (DR_REF (first_dr));
     }
 
   if (dump_enabled_p ())
@@ -5804,7 +5836,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 	    (unshare_expr (DR_BASE_ADDRESS (first_dr)),
 	     size_binop (PLUS_EXPR,
 			 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
-			 convert_to_ptrofftype (DR_INIT(first_dr))));
+			 convert_to_ptrofftype (DR_INIT (first_dr))));
       stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
 
       /* For a store with loop-invariant (but other than power-of-2)
@@ -5865,7 +5897,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 	gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
 
       prev_stmt_info = NULL;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+      alias_off = build_int_cst (ref_type, 0);
       next_stmt = first_stmt;
       for (g = 0; g < group_size; g++)
 	{
@@ -6081,11 +6113,10 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 	      && integer_zerop (DR_OFFSET (first_dr))
 	      && integer_zerop (DR_INIT (first_dr))
 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
-					get_alias_set (DR_REF (first_dr))))
+					get_alias_set (TREE_TYPE (ref_type))))
 	    {
 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
-	      dataref_offset = build_int_cst (reference_alias_ptr_type
-					      (DR_REF (first_dr)), 0);
+	      dataref_offset = build_int_cst (ref_type, 0);
 	      inv_p = false;
 	    }
 	  else
@@ -6136,7 +6167,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 
 	  /* Emit:
 	       MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY).  */
-	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+	  data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
 	  new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
 	  gimple_call_set_lhs (new_stmt, data_ref);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -6174,8 +6205,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iter
 				      dataref_ptr,
 				      dataref_offset
 				      ? dataref_offset
-				      : build_int_cst (reference_alias_ptr_type
-						       (DR_REF (first_dr)), 0));
+				      : build_int_cst (ref_type, 0));
 	      align = TYPE_ALIGN_UNIT (vectype);
 	      if (aligned_access_p (first_dr))
 		misalign = 0;
@@ -6395,7 +6425,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
   tree dataref_offset = NULL_TREE;
   gimple *ptr_incr = NULL;
   int ncopies;
-  int i, j, group_size = -1, group_gap_adj;
+  int i, j, group_size, group_gap_adj;
   tree msq = NULL_TREE, lsq;
   tree offset = NULL_TREE;
   tree byte_offset = NULL_TREE;
@@ -6417,6 +6447,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
   tree aggr_type;
   gather_scatter_info gs_info;
   vec_info *vinfo = stmt_info->vinfo;
+  tree ref_type;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -6773,10 +6804,19 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
       gcc_assert (!nested_in_vect_loop);
 
       if (slp && grouped_load)
-	first_dr = STMT_VINFO_DATA_REF
-	    (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
+	{
+	  first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+	  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+	  group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+	  ref_type = get_group_alias_ptr_type (first_stmt);
+	}
       else
-	first_dr = dr;
+	{
+	  first_stmt = stmt;
+	  first_dr = dr;
+	  group_size = 1;
+	  ref_type = reference_alias_ptr_type (DR_REF (first_dr));
+	}
 
       stride_base
 	= fold_build_pointer_plus
@@ -6820,7 +6860,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 
       prev_stmt_info = NULL;
       running_off = offvar;
-      alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
+      alias_off = build_int_cst (ref_type, 0);
       int nloads = nunits;
       int lnel = 1;
       tree ltype = TREE_TYPE (vectype);
@@ -6917,6 +6957,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
   if (grouped_load)
     {
       first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
       /* For SLP vectorization we directly vectorize a subchain
          without permutation.  */
       if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
@@ -6942,7 +6983,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 	  return true;
 	}
       first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
-      group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
       group_gap_adj = 0;
 
       /* VEC_NUM is the number of vect stmts to be created for this group.  */
@@ -6960,6 +7000,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
     	}
       else
 	vec_num = group_size;
+
+      ref_type = get_group_alias_ptr_type (first_stmt);
     }
   else
     {
@@ -6967,6 +7009,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
       first_dr = dr;
       group_size = vec_num = 1;
       group_gap_adj = 0;
+      ref_type = reference_alias_ptr_type (DR_REF (first_dr));
     }
 
   alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
@@ -7127,13 +7170,12 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 	      && integer_zerop (DR_OFFSET (first_dr))
 	      && integer_zerop (DR_INIT (first_dr))
 	      && alias_sets_conflict_p (get_alias_set (aggr_type),
-					get_alias_set (DR_REF (first_dr)))
+					get_alias_set (TREE_TYPE (ref_type)))
 	      && (alignment_support_scheme == dr_aligned
 		  || alignment_support_scheme == dr_unaligned_supported))
 	    {
 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
-	      dataref_offset = build_int_cst (reference_alias_ptr_type
-					      (DR_REF (first_dr)), 0);
+	      dataref_offset = build_int_cst (ref_type, 0);
 	      inv_p = false;
 	    }
 	  else if (first_stmt_for_drptr
@@ -7179,7 +7221,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 
 	  /* Emit:
 	       VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]).  */
-	  data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
+	  data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
 	  new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
 	  gimple_call_set_lhs (new_stmt, vec_array);
 	  vect_finish_stmt_generation (stmt, new_stmt, gsi);
@@ -7215,8 +7257,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		      = fold_build2 (MEM_REF, vectype, dataref_ptr,
 				     dataref_offset
 				     ? dataref_offset
-				     : build_int_cst (reference_alias_ptr_type
-						      (DR_REF (first_dr)), 0));
+				     : build_int_cst (ref_type, 0));
 		    align = TYPE_ALIGN_UNIT (vectype);
 		    if (alignment_support_scheme == dr_aligned)
 		      {
@@ -7272,8 +7313,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		    data_ref
 		      = build2 (MEM_REF, vectype, ptr,
-				build_int_cst (reference_alias_ptr_type
-						 (DR_REF (first_dr)), 0));
+				build_int_cst (ref_type, 0));
 		    vec_dest = vect_create_destination_var (scalar_dest,
 							    vectype);
 		    new_stmt = gimple_build_assign (vec_dest, data_ref);
@@ -7298,8 +7338,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		    vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		    data_ref
 		      = build2 (MEM_REF, vectype, ptr,
-				build_int_cst (reference_alias_ptr_type
-						 (DR_REF (first_dr)), 0));
+				build_int_cst (ref_type, 0));
 		    break;
 		  }
 		case dr_explicit_realign_optimized:
@@ -7315,8 +7354,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_itera
 		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
 		  data_ref
 		    = build2 (MEM_REF, vectype, new_temp,
-			      build_int_cst (reference_alias_ptr_type
-					       (DR_REF (first_dr)), 0));
+			      build_int_cst (ref_type, 0));
 		  break;
 		default:
 		  gcc_unreachable ();
Index: gcc/testsuite/g++.dg/pr77550.C
===================================================================
--- gcc/testsuite/g++.dg/pr77550.C	(revision 0)
+++ gcc/testsuite/g++.dg/pr77550.C	(working copy)
@@ -0,0 +1,295 @@
+// { dg-do run }
+// { dg-options "-std=c++14 -O3" }
+
+namespace std {
+typedef int size_t;
+inline namespace __cxx11 {}
+template <typename...> using _Require = void;
+template <typename> using __void_t = void;
+template <typename, typename, template <typename...> class, typename...>
+struct A {
+  using type = int;
+};
+template <typename _Default, template <typename...> class _Op,
+          typename... _Args>
+struct A<_Default, __void_t<_Op<_Args...>>, _Op, _Args...> {
+  using type = _Op<_Args...>;
+};
+template <typename _Default, template <typename...> class _Op,
+          typename... _Args>
+using __detected_or = A<_Default, void, _Op, _Args...>;
+template <typename _Default, template <typename...> class _Op,
+          typename... _Args>
+using __detected_or_t = typename __detected_or<_Default, _Op, _Args...>::type;
+template <template <typename...> class _Default,
+          template <typename...> class _Op, typename... _Args>
+using __detected_or_t_ = __detected_or_t<_Default<_Args...>, _Op, _Args...>;
+template <typename _InputIterator> void __distance(_InputIterator p1) { ++p1; }
+template <typename _InputIterator>
+void distance(_InputIterator p1, _InputIterator) {
+  __distance(p1);
+}
+template <typename, typename> using __replace_first_arg_t = int;
+struct B {
+  template <typename _Up> using rebind = _Up *;
+};
+template <typename, typename> using __ptr_rebind = B;
+template <typename _Tp> _Tp max(_Tp p1, _Tp) { return p1; }
+}
+void *operator new(unsigned long, void *p2) { return p2; }
+template <typename _Tp> struct C {
+  typedef _Tp *pointer;
+  pointer allocate(int p1) {
+    return static_cast<_Tp *>(operator new(p1 * sizeof(_Tp)));
+  }
+  template <typename _Up> void construct(_Up *p1) { new (p1) _Up; }
+};
+namespace std {
+template <typename _Tp> using __allocator_base = C<_Tp>;
+template <typename _Tp> struct allocator : __allocator_base<_Tp> {
+  typedef unsigned long size_type;
+  template <typename _Tp1> struct rebind { typedef allocator<_Tp1> other; };
+};
+struct D {
+  template <typename _Alloc, typename _Up>
+  using __rebind = typename _Alloc::template rebind<_Up>::other;
+  template <typename _Tp> using __pointer = typename _Tp::pointer;
+  template <typename _Tp> using __c_pointer = typename _Tp::const_pointer;
+  template <typename _Tp> using __size_type = typename _Tp::size_type;
+};
+template <typename _Alloc, typename _Up>
+using __alloc_rebind =
+    __detected_or_t_<__replace_first_arg_t, D::__rebind, _Alloc, _Up>;
+template <typename _Alloc> struct K : D {
+  typedef _Alloc value_type;
+  using pointer = __detected_or_t<value_type, __pointer, _Alloc>;
+  using const_pointer =
+      __detected_or_t<__ptr_rebind<pointer, value_type>, __c_pointer>;
+  using size_type = __detected_or_t<int, __size_type, _Alloc>;
+  template <typename _Tp> using rebind_alloc = __alloc_rebind<_Alloc, _Tp>;
+  template <typename _Tp> static _Require<> _S_construct(_Tp p1) {
+    _Alloc __a;
+    __a.construct(p1);
+  }
+  static pointer allocate(_Alloc p1, size_type p2) { return p1.allocate(p2); }
+  template <typename _Tp, typename _Args>
+  static auto construct(_Alloc, _Tp p2, _Args) {
+    _S_construct(p2);
+  }
+};
+}
+template <typename _Alloc> struct O : std::K<_Alloc> {
+  template <typename _Tp> struct rebind {
+    typedef typename std::K<_Alloc>::template rebind_alloc<_Tp> other;
+  };
+};
+namespace std {
+template <typename _ForwardIterator, typename _Tp, typename _Allocator>
+void __uninitialized_fill_a(_ForwardIterator p1, _ForwardIterator, _Tp,
+                            _Allocator p4) try {
+  O<_Allocator>::construct(p4, p1, 0);
+} catch (...) {
+}
+size_t __deque_buf_size(size_t p1) { return 1 ? 512 / p1 : 0; }
+template <typename _Tp, typename _Ref, typename> struct F {
+  template <typename _Up> using __ptr_to = B::rebind<_Up>;
+  template <typename _CvTp> using __iter = F<_Tp, _CvTp &, __ptr_to<_CvTp>>;
+  typedef __ptr_to<_Tp> _Elt_pointer;
+  typedef __ptr_to<_Elt_pointer> _Map_pointer;
+  _Elt_pointer _M_cur;
+  _Elt_pointer _M_first;
+  _Elt_pointer _M_last;
+  _Map_pointer _M_node;
+  F() {}
+  F(__iter<_Tp> &p1) : _M_cur(p1._M_cur), _M_node(p1._M_node) {}
+  _Ref operator*() { return *_M_cur; }
+  void operator++() {
+    _M_set_node(_M_node + 1);
+    _M_cur = _M_first;
+  }
+  void _M_set_node(_Map_pointer p1) {
+    _M_node = p1;
+    _M_first = *p1;
+    _M_last = _M_first;
+  }
+};
+template <typename _Tp, typename _Ref, typename _Ptr>
+int operator==(F<_Tp, _Ref, _Ptr> p1, F<_Tp, _Ref, _Ptr> p2) {
+  return p1._M_cur == p2._M_cur;
+}
+template <typename _Tp, typename _Ref, typename _Ptr>
+int operator!=(F<_Tp, _Ref, _Ptr> p1, F<_Tp, _Ref, _Ptr> p2) {
+  return !(p1 == p2);
+}
+template <typename _Tp, typename _Alloc> struct _Deque_base {
+  typedef typename _Alloc::template rebind<_Tp>::other _Tp_alloc_type;
+  typedef O<_Tp_alloc_type> _Alloc_traits;
+  typedef typename _Alloc_traits::pointer _Ptr;
+  typedef typename _Alloc_traits::template rebind<_Ptr>::other _Map_alloc_type;
+  typedef F<_Tp, _Tp &, _Ptr> iterator;
+  typedef F<_Tp, _Tp &, typename _Alloc_traits::const_pointer> const_iterator;
+  _Deque_base(_Alloc p1, size_t) : _M_impl(p1) { _M_initialize_map(0); }
+  ~_Deque_base() noexcept;
+  typedef typename iterator::_Map_pointer _Map_pointer;
+  struct L : _Tp_alloc_type {
+    _Map_pointer _M_map;
+    size_t _M_map_size;
+    iterator _M_start;
+    iterator _M_finish;
+    L(_Tp_alloc_type) {}
+  };
+  _Tp_alloc_type _M_get_Tp_allocator() { return _M_impl; }
+  _Ptr _M_allocate_node() { return O<_Tp_alloc_type>::allocate(_M_impl, 1); }
+  _Map_pointer _M_allocate_map(size_t p1) {
+    _Map_alloc_type __map_alloc;
+    return O<_Map_alloc_type>::allocate(__map_alloc, p1);
+  }
+  void _M_initialize_map(size_t);
+  void _M_create_nodes(_Map_pointer, _Map_pointer);
+  enum { _S_initial_map_size = 8 };
+  L _M_impl;
+};
+template <typename _Tp, typename _Alloc>
+_Deque_base<_Tp, _Alloc>::~_Deque_base() noexcept {}
+template <typename _Tp, typename _Alloc>
+void _Deque_base<_Tp, _Alloc>::_M_initialize_map(size_t) {
+  size_t __num_nodes(__deque_buf_size(sizeof(_Tp)));
+  _M_impl._M_map_size = max((size_t)_S_initial_map_size, 0);
+  _M_impl._M_map = _M_allocate_map(_M_impl._M_map_size);
+  _Map_pointer __nstart(_M_impl._M_map);
+  _Map_pointer __nfinish = __nstart + __num_nodes;
+  try {
+    _M_create_nodes(__nstart, __nfinish);
+  } catch (...) {
+  }
+  _M_impl._M_start._M_set_node(__nstart);
+  _M_impl._M_finish._M_set_node(__nfinish - 1);
+  _M_impl._M_start._M_cur = _M_impl._M_start._M_first;
+  _M_impl._M_finish._M_cur = _M_impl._M_finish._M_first;
+}
+template <typename _Tp, typename _Alloc>
+void _Deque_base<_Tp, _Alloc>::_M_create_nodes(_Map_pointer __nstart,
+                                               _Map_pointer __nfinish) {
+  _Map_pointer __cur;
+  try {
+    for (__cur = __nstart; __cur < __nfinish; ++__cur)
+      *__cur = _M_allocate_node();
+  } catch (...) {
+  }
+}
+template <typename _Tp, typename _Alloc = allocator<_Tp>>
+struct deque : _Deque_base<_Tp, _Alloc> {
+  typedef _Deque_base<_Tp, _Alloc> _Base;
+  typedef typename _Base::_Map_pointer _Map_pointer;
+  typedef _Tp value_type;
+  typedef typename _Base::const_iterator const_iterator;
+  typedef size_t size_type;
+  typedef _Alloc allocator_type;
+  using _Base::_M_get_Tp_allocator;
+  deque(size_type, value_type __value, allocator_type __a = allocator_type())
+      : _Base(__a, 0) {
+    _M_fill_initialize(__value);
+  }
+  const_iterator begin() { return this->_M_impl._M_start; }
+  const_iterator end() { return this->_M_impl._M_finish; }
+  void _M_fill_initialize(const value_type &);
+};
+template <typename _Container> auto begin(_Container p1) { return p1.begin(); }
+template <typename _Container> auto end(_Container p1) { return p1.end(); }
+template <typename _Container> auto cbegin(_Container p1) { return begin(p1); }
+template <typename _Container> auto cend(_Container p1) { return end(p1); }
+template <typename _Tp, typename _Alloc>
+void deque<_Tp, _Alloc>::_M_fill_initialize(const value_type &) {
+  _Map_pointer __cur;
+  try {
+    for (__cur = this->_M_impl._M_start._M_node;
+         __cur < this->_M_impl._M_finish._M_node; ++__cur)
+      __uninitialized_fill_a(*__cur, *__cur, 0, _M_get_Tp_allocator());
+  } catch (...) {
+  }
+}
+template <class> struct char_traits;
+namespace __cxx11 {
+template <typename _CharT, typename = char_traits<_CharT>,
+          typename = allocator<_CharT>>
+struct basic_string;
+typedef basic_string<char> string;
+}
+template <> struct char_traits<char> {
+  typedef char char_type;
+  static int compare(char_type, char_type *p2, size_t p3) {
+    return __builtin_memcmp(0, p2, p3);
+  }
+};
+namespace __cxx11 {
+template <typename, typename, typename> struct basic_string {
+  typedef O<allocator<char>> _Alloc_traits;
+  typedef _Alloc_traits::size_type size_type;
+  typedef _Alloc_traits::pointer pointer;
+  struct _Alloc_hider {
+    _Alloc_hider(pointer, allocator<char> && = allocator<char>());
+  } _M_dataplus;
+  size_type _M_string_length;
+  enum { _S_local_capacity = 15 } _M_local_buf[_S_local_capacity];
+  pointer _M_local_data();
+  void _M_set_length(size_type);
+  basic_string() : _M_dataplus(_M_local_data()) { _M_set_length(0); }
+  basic_string(const basic_string &) : _M_dataplus(0) {}
+  size_type size() { return _M_string_length; }
+  char *data() const {}
+};
+}
+template <typename _CharT>
+int operator==(basic_string<_CharT> &p1, const basic_string<_CharT> &p2) {
+  return p1.size() && char_traits<_CharT>::compare(0, p2.data(), p1.size());
+}
+}
+struct G {
+  template <class Facade> static void increment(Facade p1) { p1.increment(); }
+};
+template <class Derived> struct H {
+  Derived derived() { return *static_cast<Derived *>(this); }
+  void operator++() {
+    Derived __trans_tmp_1 = derived();
+    G::increment(__trans_tmp_1);
+  }
+};
+template <class Derived> struct I { typedef H<Derived> type; };
+template <class Derived, class Base> struct M : I<Derived>::type {
+  M(Base p1) : m_iterator(p1) {}
+  Base base() { return m_iterator; }
+  Base &base_reference() { return m_iterator; }
+  Base m_iterator;
+};
+template <class, class> struct N;
+template <class Predicate, class Iterator> struct J {
+  typedef M<N<Predicate, Iterator>, Iterator> type;
+};
+template <class Predicate, class Iterator>
+struct N : J<Predicate, Iterator>::type {
+  typedef typename J<Predicate, Iterator>::type super_t;
+  N(Predicate p1, Iterator p2, Iterator p3)
+      : super_t(p2), m_predicate(p1), m_end(p3) {}
+  void increment() {
+    while (this->base() != m_end && !m_predicate(*this->base()))
+      ++this->base_reference();
+  }
+  Predicate m_predicate;
+  Iterator m_end;
+};
+template <class Predicate, class Iterator>
+N<Predicate, Iterator> make_filter_iterator(Predicate p1, Iterator p2,
+                                            Iterator p3) {
+  return N<Predicate, Iterator>(p1, p2, p3);
+}
+struct Foo {
+  std::string bar;
+};
+int main() {
+  std::deque<Foo> foos(0, {});
+  std::string test;
+  auto p = [test](auto &foo) { return foo.bar == test; };
+  auto begin = make_filter_iterator(p, cbegin(foos), cend(foos));
+  auto end = make_filter_iterator(p, cend(foos), cend(foos));
+  distance(begin, end);
+}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-20 16:12       ` Bernd Edlinger
@ 2016-09-21  7:27         ` Richard Biener
  2016-09-21 20:19           ` Christophe Lyon
  0 siblings, 1 reply; 14+ messages in thread
From: Richard Biener @ 2016-09-21  7:27 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: gcc-patches

On Tue, 20 Sep 2016, Bernd Edlinger wrote:

> On 09/20/16 09:41, Richard Biener wrote:
> > On Mon, 19 Sep 2016, Bernd Edlinger wrote:
> >
> >> On 09/19/16 11:25, Richard Biener wrote:
> >>> On Sun, 18 Sep 2016, Bernd Edlinger wrote:
> >>>
> >>>> Hi,
> >>>>
> >>>> this PR shows that in vectorizable_store and vectorizable_load
> >>>> as well, the vector access always uses the first dr as the alias
> >>>> type for the whole access.  But that is not right, if they are
> >>>> different types, like in this example.
> >>>>
> >>>> So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
> >>>> by an alias type that is correct for all references in the whole
> >>>> access group.  With this patch we fall back to ptr_type_node, which
> >>>> can alias anything, if the group consists of different alias sets.
> >>>>
> >>>>
> >>>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> >>>> Is it OK for trunk and gcc-6-branch?
> >>>
> >>> +/* Function get_group_alias_ptr_type.
> >>> +
> >>> +   Return the alias type for the group starting at FIRST_STMT
> >>> +   containing GROUP_SIZE elements.  */
> >>> +
> >>> +static tree
> >>> +get_group_alias_ptr_type (gimple *first_stmt, int group_size)
> >>> +{
> >>> +  struct data_reference *first_dr, *next_dr;
> >>> +  gimple *next_stmt;
> >>> +  int i;
> >>> +
> >>> +  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
> >>> +  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
> >>> +  for (i = 1; i < group_size && next_stmt; i++)
> >>> +    {
> >>>
> >>>
> >>> there is no need to pass in group_size, it's enough to walk
> >>> GROUP_NEXT_ELEMENT until it becomes NULL.
> >>>
> >>> Ok with removing the redundant arg.
> >>>
> >>> Thanks,
> >>> Richard.
> >>>
> >>
> >> Hmmm, I'm afraid this needs one more iteration.
> >>
> >>
> >> I tried first to check if there are no stmts after the group_size
> >> and noticed there are cases when group_size is 0,
> >> for instance in gcc.dg/torture/pr36244.c.
> >>
> >> I think there is a bug in vectorizable_load, here:
> >>
> >>    if (grouped_load)
> >>      {
> >>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
> >>         /* For SLP vectorization we directly vectorize a subchain
> >>            without permutation.  */
> >>         if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
> >>           first_stmt = ;
> >>
> >>         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
> >>
> >>         = 0, and even worse:
> >>
> >>         group_gap_adj = vf * group_size - nunits * vec_num;
> >>
> >>         = -4 !
> >>
> >> apparently GROUP_SIZE is only valid on the GROUP_FIRST_ELEMENT,
> >
> > Yes.  I'm not sure group_size or group_gap_adj are used in the
> > slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists () case but moving
> > the computation up before we re-set first_stmt is probably a good idea.
> >
> >> while it may be 0 on SLP_TREE_SCALAR_STMTS (slp_node)[0]
> >>
> >> moving the GROUP_SIZE up before first_stmt is overwritten
> >> results in no different code....
> >
> > See above - it's eventually unused.  The load/store vectorization code
> > is quite twisted ;)
> >
> 
> Agreed.
> 
> Here is the new version of the patch:
> 
> Moved the goups_size up, and everything works fine.
> 
> Removed the parameter from get_group_alias_ptr_type.
> 
> I think in the case, where first_stmt is not set to
> GROUP_FIRST_ELEMENT (stmt_info) but directly to stmt,
> it is likely somewhere in a list, so it is not necessary
> to walk the GROUP_NEXT_ELEMENT, so I would like to call
> reference_alias_ptr_type directly in that case.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk and gcc-6 branch?

Ok for trunk and gcc-6 branch after a while.

Richard.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-21  7:27         ` Richard Biener
@ 2016-09-21 20:19           ` Christophe Lyon
  2016-09-21 20:47             ` Bernd Edlinger
  0 siblings, 1 reply; 14+ messages in thread
From: Christophe Lyon @ 2016-09-21 20:19 UTC (permalink / raw)
  To: Richard Biener; +Cc: Bernd Edlinger, gcc-patches

On 21 September 2016 at 09:16, Richard Biener <rguenther@suse.de> wrote:
> On Tue, 20 Sep 2016, Bernd Edlinger wrote:
>
>> On 09/20/16 09:41, Richard Biener wrote:
>> > On Mon, 19 Sep 2016, Bernd Edlinger wrote:
>> >
>> >> On 09/19/16 11:25, Richard Biener wrote:
>> >>> On Sun, 18 Sep 2016, Bernd Edlinger wrote:
>> >>>
>> >>>> Hi,
>> >>>>
>> >>>> this PR shows that in vectorizable_store and vectorizable_load
>> >>>> as well, the vector access always uses the first dr as the alias
>> >>>> type for the whole access.  But that is not right, if they are
>> >>>> different types, like in this example.
>> >>>>
>> >>>> So I tried to replace all reference_alias_ptr_type (DR_REF (first_dr))
>> >>>> by an alias type that is correct for all references in the whole
>> >>>> access group.  With this patch we fall back to ptr_type_node, which
>> >>>> can alias anything, if the group consists of different alias sets.
>> >>>>
>> >>>>
>> >>>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>> >>>> Is it OK for trunk and gcc-6-branch?
>> >>>
>> >>> +/* Function get_group_alias_ptr_type.
>> >>> +
>> >>> +   Return the alias type for the group starting at FIRST_STMT
>> >>> +   containing GROUP_SIZE elements.  */
>> >>> +
>> >>> +static tree
>> >>> +get_group_alias_ptr_type (gimple *first_stmt, int group_size)
>> >>> +{
>> >>> +  struct data_reference *first_dr, *next_dr;
>> >>> +  gimple *next_stmt;
>> >>> +  int i;
>> >>> +
>> >>> +  first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
>> >>> +  next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
>> >>> +  for (i = 1; i < group_size && next_stmt; i++)
>> >>> +    {
>> >>>
>> >>>
>> >>> there is no need to pass in group_size, it's enough to walk
>> >>> GROUP_NEXT_ELEMENT until it becomes NULL.
>> >>>
>> >>> Ok with removing the redundant arg.
>> >>>
>> >>> Thanks,
>> >>> Richard.
>> >>>
>> >>
>> >> Hmmm, I'm afraid this needs one more iteration.
>> >>
>> >>
>> >> I tried first to check if there are no stmts after the group_size
>> >> and noticed there are cases when group_size is 0,
>> >> for instance in gcc.dg/torture/pr36244.c.
>> >>
>> >> I think there is a bug in vectorizable_load, here:
>> >>
>> >>    if (grouped_load)
>> >>      {
>> >>        first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
>> >>         /* For SLP vectorization we directly vectorize a subchain
>> >>            without permutation.  */
>> >>         if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
>> >>           first_stmt = ;
>> >>
>> >>         group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
>> >>
>> >>         = 0, and even worse:
>> >>
>> >>         group_gap_adj = vf * group_size - nunits * vec_num;
>> >>
>> >>         = -4 !
>> >>
>> >> apparently GROUP_SIZE is only valid on the GROUP_FIRST_ELEMENT,
>> >
>> > Yes.  I'm not sure group_size or group_gap_adj are used in the
>> > slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists () case but moving
>> > the computation up before we re-set first_stmt is probably a good idea.
>> >
>> >> while it may be 0 on SLP_TREE_SCALAR_STMTS (slp_node)[0]
>> >>
>> >> moving the GROUP_SIZE up before first_stmt is overwritten
>> >> results in no different code....
>> >
>> > See above - it's eventually unused.  The load/store vectorization code
>> > is quite twisted ;)
>> >
>>
>> Agreed.
>>
>> Here is the new version of the patch:
>>
>> Moved the goups_size up, and everything works fine.
>>
>> Removed the parameter from get_group_alias_ptr_type.
>>
>> I think in the case, where first_stmt is not set to
>> GROUP_FIRST_ELEMENT (stmt_info) but directly to stmt,
>> it is likely somewhere in a list, so it is not necessary
>> to walk the GROUP_NEXT_ELEMENT, so I would like to call
>> reference_alias_ptr_type directly in that case.
>>
>>
>> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
>> Is it OK for trunk and gcc-6 branch?
>
> Ok for trunk and gcc-6 branch after a while.
>
> Richard.


Hi,

The new testcase pr77550.C fails on arm:
/testsuite/g++.dg/pr77550.C:39:43: error: 'operator new' takes type
'size_t' ('unsigned int') as first parameter [-fpermissive]
compiler exited with status 1

Christophe

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-21 20:19           ` Christophe Lyon
@ 2016-09-21 20:47             ` Bernd Edlinger
  2016-09-22  7:44               ` Richard Biener
  2016-09-22 19:56               ` Christophe Lyon
  0 siblings, 2 replies; 14+ messages in thread
From: Bernd Edlinger @ 2016-09-21 20:47 UTC (permalink / raw)
  To: Christophe Lyon, Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 379 bytes --]

On 09/21/16 21:57, Christophe Lyon wrote:
> Hi,
>
> The new testcase pr77550.C fails on arm:
> /testsuite/g++.dg/pr77550.C:39:43: error: 'operator new' takes type
> 'size_t' ('unsigned int') as first parameter [-fpermissive]
> compiler exited with status 1
>
> Christophe
>

Yes, I see, thanks.  This should fix it.

OK for trunk, gcc-6 after a while ?


Bernd.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch-pr77550-1.diff --]
[-- Type: text/x-patch; name="patch-pr77550-1.diff", Size: 1054 bytes --]

2016-09-21  Bernd Edlinger  <bernd.edlinger@hotmail.de>

	* g++.dg/pr77550.C: Use __SIZE_TYPE__.

Index: gcc/testsuite/g++.dg/pr77550.C
===================================================================
--- gcc/testsuite/g++.dg/pr77550.C	(revision 240330)
+++ gcc/testsuite/g++.dg/pr77550.C	(working copy)
@@ -36,7 +36,7 @@ struct B {
 template <typename, typename> using __ptr_rebind = B;
 template <typename _Tp> _Tp max(_Tp p1, _Tp) { return p1; }
 }
-void *operator new(unsigned long, void *p2) { return p2; }
+void *operator new(__SIZE_TYPE__, void *p2) { return p2; }
 template <typename _Tp> struct C {
   typedef _Tp *pointer;
   pointer allocate(int p1) {
@@ -47,7 +47,7 @@ template <typename _Tp> struct C {
 namespace std {
 template <typename _Tp> using __allocator_base = C<_Tp>;
 template <typename _Tp> struct allocator : __allocator_base<_Tp> {
-  typedef unsigned long size_type;
+  typedef __SIZE_TYPE__ size_type;
   template <typename _Tp1> struct rebind { typedef allocator<_Tp1> other; };
 };
 struct D {

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-21 20:47             ` Bernd Edlinger
@ 2016-09-22  7:44               ` Richard Biener
  2016-09-22 19:56               ` Christophe Lyon
  1 sibling, 0 replies; 14+ messages in thread
From: Richard Biener @ 2016-09-22  7:44 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: Christophe Lyon, gcc-patches

On Wed, 21 Sep 2016, Bernd Edlinger wrote:

> On 09/21/16 21:57, Christophe Lyon wrote:
> > Hi,
> >
> > The new testcase pr77550.C fails on arm:
> > /testsuite/g++.dg/pr77550.C:39:43: error: 'operator new' takes type
> > 'size_t' ('unsigned int') as first parameter [-fpermissive]
> > compiler exited with status 1
> >
> > Christophe
> >
> 
> Yes, I see, thanks.  This should fix it.
> 
> OK for trunk, gcc-6 after a while ?

Ok.

Richard.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-21 20:47             ` Bernd Edlinger
  2016-09-22  7:44               ` Richard Biener
@ 2016-09-22 19:56               ` Christophe Lyon
  2016-09-22 20:37                 ` Bernd Edlinger
  1 sibling, 1 reply; 14+ messages in thread
From: Christophe Lyon @ 2016-09-22 19:56 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: Richard Biener, gcc-patches

On 21 September 2016 at 22:20, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
> On 09/21/16 21:57, Christophe Lyon wrote:
>> Hi,
>>
>> The new testcase pr77550.C fails on arm:
>> /testsuite/g++.dg/pr77550.C:39:43: error: 'operator new' takes type
>> 'size_t' ('unsigned int') as first parameter [-fpermissive]
>> compiler exited with status 1
>>
>> Christophe
>>
>
> Yes, I see, thanks.  This should fix it.
>
> OK for trunk, gcc-6 after a while ?
>
>
> Bernd.

Hi,

The test now compiles and runs OK on most arm configurations, but
on arm-none-eabi --with-cpu=cortex-a9, execution fails:
qemu: uncaught target signal 11 (Segmentation fault) - core dumped
FAIL: g++.dg/pr77550.C   execution test

And on arm-none-eabi with default cpu, the test still does not compile.

Christophe

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-22 19:56               ` Christophe Lyon
@ 2016-09-22 20:37                 ` Bernd Edlinger
  2016-09-26 20:09                   ` Christophe Lyon
  0 siblings, 1 reply; 14+ messages in thread
From: Bernd Edlinger @ 2016-09-22 20:37 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Richard Biener, gcc-patches

On 09/22/16 21:30, Christophe Lyon wrote:
> On 21 September 2016 at 22:20, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
>> On 09/21/16 21:57, Christophe Lyon wrote:
>>> Hi,
>>>
>>> The new testcase pr77550.C fails on arm:
>>> /testsuite/g++.dg/pr77550.C:39:43: error: 'operator new' takes type
>>> 'size_t' ('unsigned int') as first parameter [-fpermissive]
>>> compiler exited with status 1
>>>
>>> Christophe
>>>
>>
>> Yes, I see, thanks.  This should fix it.
>>
>> OK for trunk, gcc-6 after a while ?
>>
>>
>> Bernd.
>
> Hi,
>
> The test now compiles and runs OK on most arm configurations, but
> on arm-none-eabi --with-cpu=cortex-a9, execution fails:
> qemu: uncaught target signal 11 (Segmentation fault) - core dumped
> FAIL: g++.dg/pr77550.C   execution test
>
> And on arm-none-eabi with default cpu, the test still does not compile.
>
> Christophe
>

Hmm.  This test case is hardly worth it, because on my armv7hf target
it doesn't even reproduce the original problem :(

Could you send me the exact compile-flags and the error message
please?


Bernd.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-22 20:37                 ` Bernd Edlinger
@ 2016-09-26 20:09                   ` Christophe Lyon
  2016-09-26 20:21                     ` Bernd Edlinger
  0 siblings, 1 reply; 14+ messages in thread
From: Christophe Lyon @ 2016-09-26 20:09 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: Richard Biener, gcc-patches

On 22 September 2016 at 22:30, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
> On 09/22/16 21:30, Christophe Lyon wrote:
>> On 21 September 2016 at 22:20, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
>>> On 09/21/16 21:57, Christophe Lyon wrote:
>>>> Hi,
>>>>
>>>> The new testcase pr77550.C fails on arm:
>>>> /testsuite/g++.dg/pr77550.C:39:43: error: 'operator new' takes type
>>>> 'size_t' ('unsigned int') as first parameter [-fpermissive]
>>>> compiler exited with status 1
>>>>
>>>> Christophe
>>>>
>>>
>>> Yes, I see, thanks.  This should fix it.
>>>
>>> OK for trunk, gcc-6 after a while ?
>>>
>>>
>>> Bernd.
>>
>> Hi,
>>
>> The test now compiles and runs OK on most arm configurations, but
>> on arm-none-eabi --with-cpu=cortex-a9, execution fails:
>> qemu: uncaught target signal 11 (Segmentation fault) - core dumped
>> FAIL: g++.dg/pr77550.C   execution test
>>
>> And on arm-none-eabi with default cpu, the test still does not compile.
>>
>> Christophe
>>
>
> Hmm.  This test case is hardly worth it, because on my armv7hf target
> it doesn't even reproduce the original problem :(
>
> Could you send me the exact compile-flags and the error message
> please?
>

Sorry for the delay, I've been travelling.

Compilation is:
.../xg++ -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/gcc/testsuite/g++/../../
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr77550.C
g++_tg.o -fno-diagnostics-show-caret -fdiagnostics-color=never
-nostdinc++ -I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/libstdc++-v3/include/arm-none-eabi
-I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/libstdc++-v3/include
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/libsupc++
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/include/backward
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/testsuite/util
-fmessage-length=0 -std=c++14 -O3
-L/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/./libstdc++-v3/src/.libs
-B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/./libstdc++-v3/src/.libs
-Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort -lm -o
./pr77550.exe

Then, during execution, I have:
qemu: uncaught target signal 11 (Segmentation fault) - core dumped
and that's all

My gcc configuration flags are:
--target=arm-none-eabi --disable-libgomp --disable-libmudflap
--disable-libcilkrts --enable-checking --enable-languages=c,c++
--with-newlib --disable-tls --with-mode=arm --with-cpu=cortex-a9

Christophe


>
> Bernd.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-26 20:09                   ` Christophe Lyon
@ 2016-09-26 20:21                     ` Bernd Edlinger
  2016-09-27  5:00                       ` Christophe Lyon
  0 siblings, 1 reply; 14+ messages in thread
From: Bernd Edlinger @ 2016-09-26 20:21 UTC (permalink / raw)
  To: Christophe Lyon; +Cc: Richard Biener, gcc-patches

On 09/26/16 21:19, Christophe Lyon wrote:
>
> Sorry for the delay, I've been travelling.
>
> Compilation is:
> .../xg++ -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/gcc/testsuite/g++/../../
> /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr77550.C
> g++_tg.o -fno-diagnostics-show-caret -fdiagnostics-color=never
> -nostdinc++ -I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/libstdc++-v3/include/arm-none-eabi
> -I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/libstdc++-v3/include
> -I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/libsupc++
> -I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/include/backward
> -I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/testsuite/util
> -fmessage-length=0 -std=c++14 -O3
> -L/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/./libstdc++-v3/src/.libs
> -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/./libstdc++-v3/src/.libs
> -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort -lm -o
> ./pr77550.exe
>
> Then, during execution, I have:
> qemu: uncaught target signal 11 (Segmentation fault) - core dumped
> and that's all
>
> My gcc configuration flags are:
> --target=arm-none-eabi --disable-libgomp --disable-libmudflap
> --disable-libcilkrts --enable-checking --enable-languages=c,c++
> --with-newlib --disable-tls --with-mode=arm --with-cpu=cortex-a9
>
> Christophe
>

Interesting.  At this point it might be OK to open a new PR.

So far, I cannot reproduce, but I used also cortex-a9 but
target=arm-linux-gnueabihf.

Could you please start the command above manually and add
-save-temps ?


Thanks
Bernd.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] Fix PR tree-optimization/77550
  2016-09-26 20:21                     ` Bernd Edlinger
@ 2016-09-27  5:00                       ` Christophe Lyon
  0 siblings, 0 replies; 14+ messages in thread
From: Christophe Lyon @ 2016-09-27  5:00 UTC (permalink / raw)
  To: Bernd Edlinger; +Cc: Richard Biener, gcc-patches

On 26 September 2016 at 22:09, Bernd Edlinger <bernd.edlinger@hotmail.de> wrote:
> On 09/26/16 21:19, Christophe Lyon wrote:
>>
>> Sorry for the delay, I've been travelling.
>>
>> Compilation is:
>> .../xg++ -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/gcc/testsuite/g++/../../
>> /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr77550.C
>> g++_tg.o -fno-diagnostics-show-caret -fdiagnostics-color=never
>> -nostdinc++ -I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/libstdc++-v3/include/arm-none-eabi
>> -I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/libstdc++-v3/include
>> -I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/libsupc++
>> -I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/include/backward
>> -I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/testsuite/util
>> -fmessage-length=0 -std=c++14 -O3
>> -L/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/./libstdc++-v3/src/.libs
>> -B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-eabi/gcc3/arm-none-eabi/./libstdc++-v3/src/.libs
>> -Wl,-wrap,exit -Wl,-wrap,_exit -Wl,-wrap,main -Wl,-wrap,abort -lm -o
>> ./pr77550.exe
>>
>> Then, during execution, I have:
>> qemu: uncaught target signal 11 (Segmentation fault) - core dumped
>> and that's all
>>
>> My gcc configuration flags are:
>> --target=arm-none-eabi --disable-libgomp --disable-libmudflap
>> --disable-libcilkrts --enable-checking --enable-languages=c,c++
>> --with-newlib --disable-tls --with-mode=arm --with-cpu=cortex-a9
>>
>> Christophe
>>
>
> Interesting.  At this point it might be OK to open a new PR.
>
OK, this is https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77748

> So far, I cannot reproduce, but I used also cortex-a9 but
> target=arm-linux-gnueabihf.
>
It does work for me with this configuration.
arm-non-eabi makes the difference.

> Could you please start the command above manually and add
> -save-temps ?
>
>
> Thanks
> Bernd.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2016-09-27  1:18 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-18 11:18 [PATCH] Fix PR tree-optimization/77550 Bernd Edlinger
2016-09-19  9:35 ` Richard Biener
2016-09-19 16:20   ` Bernd Edlinger
2016-09-20  8:17     ` Richard Biener
2016-09-20 16:12       ` Bernd Edlinger
2016-09-21  7:27         ` Richard Biener
2016-09-21 20:19           ` Christophe Lyon
2016-09-21 20:47             ` Bernd Edlinger
2016-09-22  7:44               ` Richard Biener
2016-09-22 19:56               ` Christophe Lyon
2016-09-22 20:37                 ` Bernd Edlinger
2016-09-26 20:09                   ` Christophe Lyon
2016-09-26 20:21                     ` Bernd Edlinger
2016-09-27  5:00                       ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).