public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH] Fix PR84362
Date: Thu, 20 Dec 2018 11:43:00 -0000	[thread overview]
Message-ID: <alpine.LSU.2.20.1812201237230.1827@zhemvz.fhfr.qr> (raw)


There's a long-standing issue with LIM which isn't very good at 
recognizing same memory references esp. when they are in MEM vs.
traditional form.  The following rectifies this for the class
of refs we can compute get_addr_base_and_unit_offset on (that
makes it easy to create a canonical ref).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2018-12-20  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/84362
	* tree-ssa-loop-im.c: Include alias.h, builtins.h and tree-dfa.h.
	(struct im_mem_ref): add ref_canonical flag.
	(struct mem_ref_hasher): Use ao_ref as compare_type.
	(mem_ref_hasher::equal): Adjust and add variant comparing ao_ref
	parts.
	(mem_ref_alloc): Take ao_ref parameter, initialize ref_canonical
	member.
	(gather_mem_refs_stmt): Set up ao_ref early and do the lookup
	using it.  If we have non-equal refs canonicalize the one
	in the hashtable used for insertion.
	(tree_ssa_lim_initialize): Adjust.

	* g++.dg/vect/pr84362.cc: New testcase.

Index: gcc/testsuite/g++.dg/vect/pr84362.cc
===================================================================
--- gcc/testsuite/g++.dg/vect/pr84362.cc	(nonexistent)
+++ gcc/testsuite/g++.dg/vect/pr84362.cc	(working copy)
@@ -0,0 +1,28 @@
+// { dg-do compile }
+// { dg-require-effective-target c++11 }
+
+constexpr unsigned int capacity = 1000;
+
+struct vec
+{
+  int values[capacity];
+  unsigned int _size = 0;
+  unsigned int size() const noexcept { return _size; }
+  void push(int x)
+    {
+      values[size()] = x;
+      ++_size;
+    }
+};
+
+int main()
+{
+  vec v;
+  for(unsigned int i{0}; i != capacity; ++i)
+    {
+      v.push(i);
+    }
+  asm volatile("" : : "g"(&v) : "memory");
+}
+
+// { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" { target vect_int } } }
Index: gcc/tree-ssa-loop-im.c
===================================================================
--- gcc/tree-ssa-loop-im.c	(revision 267262)
+++ gcc/tree-ssa-loop-im.c	(working copy)
@@ -45,6 +45,9 @@ along with GCC; see the file COPYING3.
 #include "gimple-fold.h"
 #include "tree-scalar-evolution.h"
 #include "tree-ssa-loop-niter.h"
+#include "alias.h"
+#include "builtins.h"
+#include "tree-dfa.h"
 
 /* TODO:  Support for predicated code motion.  I.e.
 
@@ -112,8 +115,9 @@ struct mem_ref_loc
 
 struct im_mem_ref
 {
-  unsigned id;			/* ID assigned to the memory reference
+  unsigned id : 31;		/* ID assigned to the memory reference
 				   (its index in memory_accesses.refs_list)  */
+  unsigned ref_canonical : 1;   /* Whether mem.ref was canonicalized.  */
   hashval_t hash;		/* Its hash value.  */
 
   /* The memory access itself and associated caching of alias-oracle
@@ -149,9 +153,9 @@ struct im_mem_ref
 
 struct mem_ref_hasher : nofree_ptr_hash <im_mem_ref>
 {
-  typedef tree_node *compare_type;
+  typedef ao_ref *compare_type;
   static inline hashval_t hash (const im_mem_ref *);
-  static inline bool equal (const im_mem_ref *, const tree_node *);
+  static inline bool equal (const im_mem_ref *, const ao_ref *);
 };
 
 /* A hash function for struct im_mem_ref object OBJ.  */
@@ -166,9 +170,19 @@ mem_ref_hasher::hash (const im_mem_ref *
    memory reference OBJ2.  */
 
 inline bool
-mem_ref_hasher::equal (const im_mem_ref *mem1, const tree_node *obj2)
+mem_ref_hasher::equal (const im_mem_ref *mem1, const ao_ref *obj2)
 {
-  return operand_equal_p (mem1->mem.ref, (const_tree) obj2, 0);
+  if (obj2->max_size_known_p ())
+    return (operand_equal_p (mem1->mem.base, obj2->base, 0)
+	    && known_eq (mem1->mem.offset, obj2->offset)
+	    && known_eq (mem1->mem.size, obj2->size)
+	    && known_eq (mem1->mem.max_size, obj2->max_size)
+	    && mem1->mem.volatile_p == obj2->volatile_p
+	    && mem1->mem.ref_alias_set == obj2->ref_alias_set
+	    && types_compatible_p (TREE_TYPE (mem1->mem.ref),
+				   TREE_TYPE (obj2->ref)));
+  else
+    return operand_equal_p (mem1->mem.ref, obj2->ref, 0);
 }
 
 
@@ -1356,11 +1370,15 @@ memref_free (struct im_mem_ref *mem)
    value is HASH and id is ID.  */
 
 static im_mem_ref *
-mem_ref_alloc (tree mem, unsigned hash, unsigned id)
+mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
 {
   im_mem_ref *ref = XOBNEW (&mem_ref_obstack, struct im_mem_ref);
-  ao_ref_init (&ref->mem, mem);
+  if (mem)
+    ref->mem = *mem;
+  else
+    ao_ref_init (&ref->mem, error_mark_node);
   ref->id = id;
+  ref->ref_canonical = false;
   ref->hash = hash;
   ref->stored = NULL;
   bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
@@ -1436,17 +1454,79 @@ gather_mem_refs_stmt (struct loop *loop,
     }
   else
     {
-      hash = iterative_hash_expr (*mem, 0);
-      slot = memory_accesses.refs->find_slot_with_hash (*mem, hash, INSERT);
+      /* We are looking for equal refs that might differ in structure
+         such as a.b vs. MEM[&a + 4].  So we key off the ao_ref but
+	 make sure we can canonicalize the ref in the hashtable if
+	 non-operand_equal_p refs are found.  For the lookup we mark
+	 the case we want strict equality with aor.max_size == -1.  */
+      ao_ref aor;
+      ao_ref_init (&aor, *mem);
+      ao_ref_base (&aor);
+      ao_ref_alias_set (&aor);
+      HOST_WIDE_INT offset, size, max_size;
+      poly_int64 saved_maxsize = aor.max_size, mem_off;
+      tree mem_base;
+      if (aor.max_size_known_p ()
+	  && aor.offset.is_constant (&offset)
+	  && aor.offset.is_constant (&size)
+	  && aor.offset.is_constant (&max_size)
+	  && size == max_size
+	  && (mem_base = get_addr_base_and_unit_offset (aor.ref, &mem_off)))
+	{
+	  hash = iterative_hash_expr (ao_ref_base (&aor), 0);
+	  hash = iterative_hash_host_wide_int (offset, hash);
+	  hash = iterative_hash_host_wide_int (size, hash);
+	}
+      else
+	{
+	  hash = iterative_hash_expr (aor.ref, 0);
+	  aor.max_size = -1;
+	}
+      slot = memory_accesses.refs->find_slot_with_hash (&aor, hash, INSERT);
+      aor.max_size = saved_maxsize;
       if (*slot)
 	{
+	  if (!(*slot)->ref_canonical 
+	      && !operand_equal_p (*mem, (*slot)->mem.ref, 0))
+	    {
+	      /* If we didn't yet canonicalize the hashtable ref (which
+	         we'll end up using for code insertion) and hit a second
+		 equal ref that is not structurally equivalent create
+		 a canonical ref which is a bare MEM_REF.  */
+	      if (TREE_CODE (*mem) == MEM_REF
+		  || TREE_CODE (*mem) == TARGET_MEM_REF)
+		{
+		  (*slot)->mem.ref = *mem;
+		  (*slot)->mem.base_alias_set = ao_ref_base_alias_set (&aor);
+		}
+	      else
+		{
+		  tree ref_alias_type = reference_alias_ptr_type (*mem);
+		  unsigned int ref_align = get_object_alignment (*mem);
+		  tree ref_type = TREE_TYPE (*mem);
+		  tree tmp = build_fold_addr_expr (unshare_expr (mem_base));
+		  if (TYPE_ALIGN (ref_type) != ref_align)
+		    ref_type = build_aligned_type (ref_type, ref_align);
+		  (*slot)->mem.ref
+		    = fold_build2 (MEM_REF, ref_type, tmp,
+				   build_int_cst (ref_alias_type, mem_off));
+		  if ((*slot)->mem.volatile_p)
+		    TREE_THIS_VOLATILE ((*slot)->mem.ref) = 1;
+		  gcc_checking_assert (TREE_CODE ((*slot)->mem.ref) == MEM_REF
+				       && is_gimple_mem_ref_addr
+				            (TREE_OPERAND ((*slot)->mem.ref,
+							   0)));
+		  (*slot)->mem.base_alias_set = (*slot)->mem.ref_alias_set;
+		}
+	      (*slot)->ref_canonical = true;
+	    }
 	  ref = *slot;
 	  id = ref->id;
 	}
       else
 	{
 	  id = memory_accesses.refs_list.length ();
-	  ref = mem_ref_alloc (*mem, hash, id);
+	  ref = mem_ref_alloc (&aor, hash, id);
 	  memory_accesses.refs_list.safe_push (ref);
 	  *slot = ref;
 
@@ -2472,7 +2552,7 @@ tree_ssa_lim_initialize (void)
   memory_accesses.refs_list.create (100);
   /* Allocate a special, unanalyzable mem-ref with ID zero.  */
   memory_accesses.refs_list.quick_push
-    (mem_ref_alloc (error_mark_node, 0, UNANALYZABLE_MEM_ID));
+    (mem_ref_alloc (NULL, 0, UNANALYZABLE_MEM_ID));
 
   memory_accesses.refs_in_loop.create (number_of_loops (cfun));
   memory_accesses.refs_in_loop.quick_grow (number_of_loops (cfun));

                 reply	other threads:[~2018-12-20 11:39 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=alpine.LSU.2.20.1812201237230.1827@zhemvz.fhfr.qr \
    --to=rguenther@suse.de \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).