public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] let hash-based containers work with non-trivial types (PR 90923)
       [not found] <b30b052e-97d1-ce1d-d4c2-816b0a05f2c1@gmail.com>
@ 2019-06-19  3:17 ` Martin Sebor
       [not found] ` <CAFiYyc1scx6AxPi+ME77GB_5aNJcKrQPuugBts+7E2gnkxzPCA@mail.gmail.com>
  1 sibling, 0 replies; 4+ messages in thread
From: Martin Sebor @ 2019-06-19  3:17 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 788 bytes --]

Let me try that again to the right list.

On 6/18/19 9:14 PM, Martin Sebor wrote:
> Bug 90923 shows that even though GCC hash-table based containers
> like hash_map can be instantiated on types with user-defined ctors
> and dtors they invoke the dtors of such types without invoking
> the corresponding ctors.
> 
> It was thanks to this bug that I spent a day debugging "interesting"
> miscompilations during GCC bootstrap (in fairness, it was that and
> bug 90904 about auto_vec copy assignment/construction also being
> hosed even for POD types).
> 
> The attached patch corrects the hash_map and hash_set templates
> to invoke the ctors of the elements they insert and makes them
> (hopefully) safe to use with non-trivial user-defined types.
> 
> Tested on x86_64-linux.
> 
> Martin


[-- Attachment #2: gcc-90923.diff --]
[-- Type: text/x-patch, Size: 8915 bytes --]

PR middle-end/90923 - hash_map destroys elements without constructing them

gcc/ChangeLog:

	PR middle-end/90923
	* hash-map.h (hash_map::put): On insertion invoke element ctor.
	(hash_map::get_or_insert): Same.  Reformat comment.
	* hash-set.h (hash_set::add): On insertion invoke element ctor.
	* hash-map-tests.c (test_map_of_type_with_ctor_and_dtor): New.
 	* hash-set-tests.c (test_map_of_type_with_ctor_and_dtor): New.

diff --git a/gcc/hash-map-tests.c b/gcc/hash-map-tests.c
index b79c7821684..9b365ef1480 100644
--- a/gcc/hash-map-tests.c
+++ b/gcc/hash-map-tests.c
@@ -103,12 +103,98 @@ test_map_of_strings_to_int ()
   ASSERT_EQ (1, string_map.elements ());
 }
 
+typedef struct hash_map_test_val_t
+{
+  static int ndefault;
+  static int ncopy;
+  static int nassign;
+  static int ndtor;
+
+  hash_map_test_val_t ()
+    : ptr (&ptr)
+  {
+    ++ndefault;
+  }
+
+  hash_map_test_val_t (const hash_map_test_val_t &)
+    : ptr (&ptr)
+  {
+    ++ncopy;
+  }
+
+  hash_map_test_val_t& operator= (const hash_map_test_val_t &)
+    {
+     ++nassign;
+     return *this;
+    }
+
+  ~hash_map_test_val_t ()
+    {
+     gcc_assert (ptr == &ptr);
+     ++ndtor;
+    }
+
+  void *ptr;
+} val_t;
+
+int val_t::ndefault;
+int val_t::ncopy;
+int val_t::nassign;
+int val_t::ndtor;
+
+static void
+test_map_of_type_with_ctor_and_dtor ()
+{
+  typedef hash_map <void *, val_t> Map;
+
+  {
+    Map m;
+    (void)&m;
+  }
+
+  ASSERT_TRUE (val_t::ndefault == 0);
+  ASSERT_TRUE (val_t::ncopy == 0);
+  ASSERT_TRUE (val_t::nassign == 0);
+  ASSERT_TRUE (val_t::ndtor == 0);
+
+  {
+    Map m;
+    void *p = &p;
+    m.get_or_insert (p);
+  }
+
+  ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor);
+
+  {
+    Map m;
+    void *p = &p, *q = &q;
+    val_t &v1 = m.get_or_insert (p);
+    val_t &v2 = m.get_or_insert (q);
+
+    ASSERT_TRUE (v1.ptr == &v1.ptr && &v2.ptr == v2.ptr);
+  }
+
+  ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor);
+
+  {
+    Map m;
+    void *p = &p, *q = &q;
+    m.get_or_insert (p);
+    m.remove (p);
+    m.get_or_insert (q);
+    m.remove (q);
+
+    ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor);
+  }
+}
+
 /* Run all of the selftests within this file.  */
 
 void
 hash_map_tests_c_tests ()
 {
   test_map_of_strings_to_int ();
+  test_map_of_type_with_ctor_and_dtor ();
 }
 
 } // namespace selftest
diff --git a/gcc/hash-map.h b/gcc/hash-map.h
index 588dfda04fa..71cc1dead1d 100644
--- a/gcc/hash-map.h
+++ b/gcc/hash-map.h
@@ -21,8 +21,12 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef hash_map_h
 #define hash_map_h
 
-template<typename KeyId, typename Value,
-	 typename Traits>
+/* KeyId must be a trivial (POD) type.  Value may be non-trivial
+   (non-POD).  Ctors and dtors are invoked as necessary on
+   inserted and removed elements.  On hash_map destruction all
+   elements are removed.  */
+
+template<typename KeyId, typename Value, typename Traits>
 class GTY((user)) hash_map
 {
   typedef typename Traits::key_type Key;
@@ -151,12 +155,16 @@ public:
     {
       hash_entry *e = m_table.find_slot_with_hash (k, Traits::hash (k),
 						   INSERT);
-      bool existed = !hash_entry::is_empty (*e);
-      if (!existed)
-	e->m_key = k;
+      bool ins = hash_entry::is_empty (*e);
+      if (ins)
+	{
+	  e->m_key = k;
+	  new ((void *) &e->m_value) Value (v);
+	}
+      else
+	e->m_value = v;
 
-      e->m_value = v;
-      return existed;
+      return !ins;
     }
 
   /* if the passed in key is in the map return its value otherwise NULL.  */
@@ -168,8 +176,8 @@ public:
     }
 
   /* Return a reference to the value for the passed in key, creating the entry
-     if it doesn't already exist.  If existed is not NULL then it is set to false
-     if the key was not previously in the map, and true otherwise.  */
+     if it doesn't already exist.  If existed is not NULL then it is set to
+     false if the key was not previously in the map, and true otherwise.  */
 
   Value &get_or_insert (const Key &k, bool *existed = NULL)
     {
@@ -177,7 +185,10 @@ public:
 						   INSERT);
       bool ins = Traits::is_empty (*e);
       if (ins)
-	e->m_key = k;
+	{
+	  e->m_key = k;
+	  new ((void *)&e->m_value) Value ();
+	}
 
       if (existed != NULL)
 	*existed = !ins;
diff --git a/gcc/hash-set-tests.c b/gcc/hash-set-tests.c
index e0d1d47805b..c96fe538d9f 100644
--- a/gcc/hash-set-tests.c
+++ b/gcc/hash-set-tests.c
@@ -134,12 +134,166 @@ test_set_of_strings ()
   ASSERT_EQ (2, t.elements ());
 }
 
+typedef struct hash_set_test_value_t
+{
+  static int ndefault;
+  static int ncopy;
+  static int nassign;
+  static int ndtor;
+
+  hash_set_test_value_t (int v = 1): pval (&val), val (v)
+  {
+    ++ndefault;
+  }
+
+  hash_set_test_value_t (const hash_set_test_value_t &rhs)
+    : pval (&val), val (rhs.val)
+  {
+    ++ncopy;
+  }
+
+  hash_set_test_value_t& operator= (const hash_set_test_value_t &rhs)
+    {
+     ++nassign;
+     val = rhs.val;
+     return *this;
+    }
+
+  ~hash_set_test_value_t ()
+    {
+     /* Verify that the value hasn't been corrupted.  */
+     gcc_assert (*pval > 0);
+     gcc_assert (pval == &val);
+     *pval = -3;
+     ++ndtor;
+    }
+
+  int *pval;
+  int val;
+} val_t;
+
+int val_t::ndefault;
+int val_t::ncopy;
+int val_t::nassign;
+int val_t::ndtor;
+
+struct value_hash_traits: int_hash<int, -1, -2>
+{
+  typedef int_hash<int, -1, -2> base_type;
+  typedef val_t                 value_type;
+  typedef value_type            compare_type;
+
+  static hashval_t hash (const value_type &v)
+  {
+    return base_type::hash (v.val);
+  }
+
+  static bool equal (const value_type &a, const compare_type &b)
+  {
+    return base_type::equal (a.val, b.val);
+  }
+
+  static void mark_deleted (value_type &v)
+  {
+    base_type::mark_deleted (v.val);
+  }
+
+  static void mark_empty (value_type &v)
+  {
+    base_type::mark_empty (v.val);
+  }
+
+  static bool is_deleted (const value_type &v)
+  {
+    return base_type::is_deleted (v.val);
+  }
+
+  static bool is_empty (const value_type &v)
+  {
+    return base_type::is_empty (v.val);
+  }
+
+  static void remove (value_type &v)
+  {
+    v.~value_type ();
+  }
+};
+
+static void
+test_set_of_type_with_ctor_and_dtor ()
+{
+  typedef hash_set <val_t, false, value_hash_traits> Set;
+
+  {
+    Set s;
+    (void)&s;
+  }
+
+  ASSERT_TRUE (val_t::ndefault == 0);
+  ASSERT_TRUE (val_t::ncopy == 0);
+  ASSERT_TRUE (val_t::nassign == 0);
+  ASSERT_TRUE (val_t::ndtor == 0);
+
+  {
+    Set s;
+    ASSERT_EQ (false, s.add (val_t ()));
+    ASSERT_EQ (true, 1 == s.elements ());
+  }
+
+  ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor);
+
+  {
+    Set s;
+    ASSERT_EQ (false, s.add (val_t ()));
+    ASSERT_EQ (true, s.add (val_t ()));
+    ASSERT_EQ (true, 1 == s.elements ());
+  }
+
+  ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor);
+
+  {
+    Set s;
+    val_t v1 (1), v2 (2), v3 (3);
+    int ndefault = val_t::ndefault;
+    int nassign = val_t::nassign;
+
+    ASSERT_EQ (false, s.add (v1));
+    ASSERT_EQ (true, s.contains (v1));
+    ASSERT_EQ (true, 1 == s.elements ());
+
+    ASSERT_EQ (false, s.add (v2));
+    ASSERT_EQ (true, s.contains (v2));
+    ASSERT_EQ (true, 2 == s.elements ());
+
+    ASSERT_EQ (false, s.add (v3));
+    ASSERT_EQ (true, s.contains (v3));
+    ASSERT_EQ (true, 3 == s.elements ());
+
+    ASSERT_EQ (true, s.add (v2));
+    ASSERT_EQ (true, s.contains (v2));
+    ASSERT_EQ (true, 3 == s.elements ());
+
+    s.remove (v2);
+    ASSERT_EQ (true, 2 == s.elements ());
+    s.remove (v3);
+    ASSERT_EQ (true, 1 == s.elements ());
+
+    /* Verify that no default ctors or assignment operators have
+       been called.  */
+    ASSERT_EQ (true, ndefault == val_t::ndefault);
+    ASSERT_EQ (true, nassign == val_t::nassign);
+  }
+
+  ASSERT_TRUE (val_t::ndefault + val_t::ncopy == val_t::ndtor);
+}
+
 /* Run all of the selftests within this file.  */
 
 void
 hash_set_tests_c_tests ()
 {
   test_set_of_strings ();
+  test_set_of_type_with_ctor_and_dtor ();
 }
 
 } // namespace selftest
diff --git a/gcc/hash-set.h b/gcc/hash-set.h
index d891ed78297..5eb4bd768d9 100644
--- a/gcc/hash-set.h
+++ b/gcc/hash-set.h
@@ -21,6 +21,11 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef hash_set_h
 #define hash_set_h
 
+/* KeyId must be a trivial (POD) type.  Traits::value_type may be
+   non-trivial (non-POD).  Ctors and dtors are invoked as necessary
+   on inserted and removed elements.  On hash_set destruction all
+   elements are removed.  */
+
 template<typename KeyId, bool Lazy = false,
 	 typename Traits = default_hash_traits<KeyId> >
 class hash_set
@@ -48,7 +53,7 @@ public:
       Key *e = m_table.find_slot_with_hash (k, Traits::hash (k), INSERT);
       bool existed = !Traits::is_empty (*e);
       if (!existed)
-	*e = k;
+	new (e) Key (k);
 
       return existed;
     }

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] let hash-based containers work with non-trivial types (PR 90923)
       [not found]             ` <33e05a72-ba9e-ff20-bff5-6e8ccfdbc5b2@gmail.com>
@ 2019-07-01 14:55               ` Martin Sebor
  2019-07-01 16:34                 ` Richard Biener
  0 siblings, 1 reply; 4+ messages in thread
From: Martin Sebor @ 2019-07-01 14:55 UTC (permalink / raw)
  To: Jonathan Wakely, Richard Biener; +Cc: gcc mailing list, gcc-patches

[Adding gcc-patches]

Richard, do you have any further comments or is the revised patch
good to commit?

Martin

On 6/25/19 2:30 PM, Martin Sebor wrote:
> On 6/25/19 3:53 AM, Jonathan Wakely wrote:
>> On 24/06/19 19:42 +0200, Richard Biener wrote:
>>> On Mon, Jun 24, 2019 at 4:35 PM Martin Sebor <msebor@gmail.com> wrote:
>>>>
>>>> On 6/24/19 6:11 AM, Richard Biener wrote:
>>>> > On Fri, Jun 21, 2019 at 7:17 PM Martin Sebor <msebor@gmail.com> 
>>>> wrote:
>>>> >>
>>>> >> On 6/21/19 6:06 AM, Richard Biener wrote:
>>>> >>> On Wed, Jun 19, 2019 at 5:15 AM Martin Sebor <msebor@gmail.com> 
>>>> wrote:
>>>> >>>>
>>>> >>>> Bug 90923 shows that even though GCC hash-table based containers
>>>> >>>> like hash_map can be instantiated on types with user-defined ctors
>>>> >>>> and dtors they invoke the dtors of such types without invoking
>>>> >>>> the corresponding ctors.
>>>> >>>>
>>>> >>>> It was thanks to this bug that I spent a day debugging 
>>>> "interesting"
>>>> >>>> miscompilations during GCC bootstrap (in fairness, it was that and
>>>> >>>> bug 90904 about auto_vec copy assignment/construction also being
>>>> >>>> hosed even for POD types).
>>>> >>>>
>>>> >>>> The attached patch corrects the hash_map and hash_set templates
>>>> >>>> to invoke the ctors of the elements they insert and makes them
>>>> >>>> (hopefully) safe to use with non-trivial user-defined types.
>>>> >>>
>>>> >>> Hum.  I am worried about the difference of assignment vs. 
>>>> construction
>>>> >>> in ::put()
>>>> >>>
>>>> >>> +      bool ins = hash_entry::is_empty (*e);
>>>> >>> +      if (ins)
>>>> >>> +       {
>>>> >>> +         e->m_key = k;
>>>> >>> +         new ((void *) &e->m_value) Value (v);
>>>> >>> +       }
>>>> >>> +      else
>>>> >>> +       e->m_value = v;
>>>> >>>
>>>> >>> why not invoke the dtor on the old value and then the ctor again?
>>>> >>
>>>> >> It wouldn't work for self-assignment:
>>>> >>
>>>> >>     Value &y = m.get_or_insert (key);
>>>> >>     m.put (key, y);
>>>> >>
>>>> >> The usual rule of thumb for writes into containers is to use
>>>> >> construction when creating a new element and assignment when
>>>> >> replacing the value of an existing element.
>>>> >>
>>>> >> Which reminds me that the hash containers, despite being copy-
>>>> >> constructible (at least for POD types, they aren't for user-
>>>> >> defined types), also aren't safe for assignment even for PODs.
>>>> >> I opened bug 90959 for this.  Until the assignment is fixed
>>>> >> I made it inaccessibe in the patch (I have fixed the copy
>>>> >> ctor to DTRT for non-PODs).
>>>> >>
>>>> >>> How is an empty hash_entry constructed?
>>>> >>
>>>> >> In hash_table::find_slot_with_hash simply by finding an empty
>>>> >> slot and returning a pointer to it.  The memory for the slot
>>>> >> is marked "empty" by calling the Traits::mark_empty() function.
>>>> >>
>>>> >> The full type of hash_map<void*, Value> is actually
>>>> >>
>>>> >>     hash_map<void*,
>>>> >>              Value,
>>>> >>              simple_hashmap_traits<default_hash_traits<void*>,
>>>> >>                                    Value>
>>>> >>
>>>> >> and simple_hashmap_traits delegates it to default_hash_traits
>>>> >> whose mark_empty() just clears the void*, leaving the Value
>>>> >> part uninitialized.  That makes sense because we don't want
>>>> >> to call ctors for empty entries.  I think the questions one
>>>> >> might ask if one were to extend the design are: a) what class
>>>> >> should invoke the ctor/assignment and b) should it do it
>>>> >> directly or via the traits?
>>>> >>
>>>> >>> ::remove() doesn't
>>>> >>> seem to invoke the dtor either, instead it relies on the
>>>> >>> traits::remove function?
>>>> >>
>>>> >> Yes.  There is no Traits::construct or assign or copy.  We
>>>> >> could add them but I'm not sure I see to what end (there could
>>>> >> be use cases, I just don't know enough about these classes to
>>>> >> think of any).
>>>> >>
>>>> >> Attached is an updated patch with the additional minor fixes
>>>> >> mentioned above.
>>>> >>
>>>> >> Martin
>>>> >>
>>>> >> PS I think we would get much better results by adopting
>>>> >> the properly designed and tested standard library containers
>>>> >> than by spending time trying to improve the design of these
>>>> >> legacy classes.  For simple uses that don't need to integrate
>>>> >> with the GC machinery the standard containers should be fine
>>>> >> (plus, it'd provide us with greater motivation to improve
>>>> >> them and the code GCC emits for their uses).  Unfortunately,
>>>> >> to be able to use the hash-based containers we would need to
>>>> >> upgrade to C++ 11.  Isn't it time yet?
>>>> >
>>>> > I don't think so.  I'm also not sure if C++11 on its own is desirable
>>>> > or if it should be C++14 or later at that point.  SLES 12 has GCC 4.8
>>>> > as host compiler (but also GCC 7+ optionally), SLES 15 has GCC 7.
>>>> > SLES 11 already struggles currently (GCC 4.3) but I'd no longer
>>>> > consider that important enough.
>>>> >
>>>> > Note any such change to libstdc++ containers should be complete
>>>> > and come with both code-size and compile-time and memory-usage
>>>> > measurements (both of GCC and other apps of course).
>>>>
>>>> Can I go ahead and commit the patch?
>>>
>>> I think we need to document the requirements on Value classes better.
>>>
>>> @@ -177,7 +185,10 @@ public:
>>>                                                   INSERT);
>>>       bool ins = Traits::is_empty (*e);
>>>       if (ins)
>>> -       e->m_key = k;
>>> +       {
>>> +         e->m_key = k;
>>> +         new ((void *)&e->m_value) Value ();
>>> +       }
>>>
>>> this now requires a default constructor and I always forget about
>>> differences between the different form of initializations -- for a POD,
>>> does this zero the entry?
>>>
>>> Otherwise looks OK to me - I was hoping Jonathan would chime in here.
>>
>> The patch looks good to me. I 100% agree with Martin that put() should
>> not destroy an existing element and recreate a new one. Assignment is
>> the right way to update the value.
>>
>> And Value() is the correct initialization.
>>
>> The only change I'd suggest is something to enforce the "KeyId must be
>> a trivial (POD) type" requirement:
>>
>> #if __GNUC__ >= 6 && __cplusplus >= 201103L
>> static_assert(__is_pod(KeyId), "KeyId must be a trivial (POD) type");
>> #endif
>>
>> This could actually be added for 4.7 and up (__is_pod is available
>> earlier, but __cplusplus isn't set correctly before that) but GCC 6 is
>> when we started to default to C++14. If the check only happens for
>> people bootstrapping with new versions of GCC it's still going to
>> catch misuses with non-POD types.
> 
> I've updated the comments explaining the constraints in more detail
> and added the static assert to hash_table where it covers both
> has_map and hash_set.  FWIW, I don't imagine anyone instantiating
> these containers on a non-trivial key types in GCC but having
> the assert there doesn't hurt anything.
> 
> Martin
> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] let hash-based containers work with non-trivial types (PR 90923)
  2019-07-01 14:55               ` Martin Sebor
@ 2019-07-01 16:34                 ` Richard Biener
  2019-07-01 18:34                   ` Martin Sebor
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Biener @ 2019-07-01 16:34 UTC (permalink / raw)
  To: Martin Sebor; +Cc: Jonathan Wakely, gcc mailing list, gcc-patches

On Mon, Jul 1, 2019 at 4:55 PM Martin Sebor <msebor@gmail.com> wrote:>
> [Adding gcc-patches]
>
> Richard, do you have any further comments or is the revised patch
> good to commit?

No further comments from my side - it's good to commit.

Richard.

> Martin
>
> On 6/25/19 2:30 PM, Martin Sebor wrote:
> > On 6/25/19 3:53 AM, Jonathan Wakely wrote:
> >> On 24/06/19 19:42 +0200, Richard Biener wrote:
> >>> On Mon, Jun 24, 2019 at 4:35 PM Martin Sebor <msebor@gmail.com> wrote:
> >>>>
> >>>> On 6/24/19 6:11 AM, Richard Biener wrote:
> >>>> > On Fri, Jun 21, 2019 at 7:17 PM Martin Sebor <msebor@gmail.com>
> >>>> wrote:
> >>>> >>
> >>>> >> On 6/21/19 6:06 AM, Richard Biener wrote:
> >>>> >>> On Wed, Jun 19, 2019 at 5:15 AM Martin Sebor <msebor@gmail.com>
> >>>> wrote:
> >>>> >>>>
> >>>> >>>> Bug 90923 shows that even though GCC hash-table based containers
> >>>> >>>> like hash_map can be instantiated on types with user-defined ctors
> >>>> >>>> and dtors they invoke the dtors of such types without invoking
> >>>> >>>> the corresponding ctors.
> >>>> >>>>
> >>>> >>>> It was thanks to this bug that I spent a day debugging
> >>>> "interesting"
> >>>> >>>> miscompilations during GCC bootstrap (in fairness, it was that and
> >>>> >>>> bug 90904 about auto_vec copy assignment/construction also being
> >>>> >>>> hosed even for POD types).
> >>>> >>>>
> >>>> >>>> The attached patch corrects the hash_map and hash_set templates
> >>>> >>>> to invoke the ctors of the elements they insert and makes them
> >>>> >>>> (hopefully) safe to use with non-trivial user-defined types.
> >>>> >>>
> >>>> >>> Hum.  I am worried about the difference of assignment vs.
> >>>> construction
> >>>> >>> in ::put()
> >>>> >>>
> >>>> >>> +      bool ins = hash_entry::is_empty (*e);
> >>>> >>> +      if (ins)
> >>>> >>> +       {
> >>>> >>> +         e->m_key = k;
> >>>> >>> +         new ((void *) &e->m_value) Value (v);
> >>>> >>> +       }
> >>>> >>> +      else
> >>>> >>> +       e->m_value = v;
> >>>> >>>
> >>>> >>> why not invoke the dtor on the old value and then the ctor again?
> >>>> >>
> >>>> >> It wouldn't work for self-assignment:
> >>>> >>
> >>>> >>     Value &y = m.get_or_insert (key);
> >>>> >>     m.put (key, y);
> >>>> >>
> >>>> >> The usual rule of thumb for writes into containers is to use
> >>>> >> construction when creating a new element and assignment when
> >>>> >> replacing the value of an existing element.
> >>>> >>
> >>>> >> Which reminds me that the hash containers, despite being copy-
> >>>> >> constructible (at least for POD types, they aren't for user-
> >>>> >> defined types), also aren't safe for assignment even for PODs.
> >>>> >> I opened bug 90959 for this.  Until the assignment is fixed
> >>>> >> I made it inaccessibe in the patch (I have fixed the copy
> >>>> >> ctor to DTRT for non-PODs).
> >>>> >>
> >>>> >>> How is an empty hash_entry constructed?
> >>>> >>
> >>>> >> In hash_table::find_slot_with_hash simply by finding an empty
> >>>> >> slot and returning a pointer to it.  The memory for the slot
> >>>> >> is marked "empty" by calling the Traits::mark_empty() function.
> >>>> >>
> >>>> >> The full type of hash_map<void*, Value> is actually
> >>>> >>
> >>>> >>     hash_map<void*,
> >>>> >>              Value,
> >>>> >>              simple_hashmap_traits<default_hash_traits<void*>,
> >>>> >>                                    Value>
> >>>> >>
> >>>> >> and simple_hashmap_traits delegates it to default_hash_traits
> >>>> >> whose mark_empty() just clears the void*, leaving the Value
> >>>> >> part uninitialized.  That makes sense because we don't want
> >>>> >> to call ctors for empty entries.  I think the questions one
> >>>> >> might ask if one were to extend the design are: a) what class
> >>>> >> should invoke the ctor/assignment and b) should it do it
> >>>> >> directly or via the traits?
> >>>> >>
> >>>> >>> ::remove() doesn't
> >>>> >>> seem to invoke the dtor either, instead it relies on the
> >>>> >>> traits::remove function?
> >>>> >>
> >>>> >> Yes.  There is no Traits::construct or assign or copy.  We
> >>>> >> could add them but I'm not sure I see to what end (there could
> >>>> >> be use cases, I just don't know enough about these classes to
> >>>> >> think of any).
> >>>> >>
> >>>> >> Attached is an updated patch with the additional minor fixes
> >>>> >> mentioned above.
> >>>> >>
> >>>> >> Martin
> >>>> >>
> >>>> >> PS I think we would get much better results by adopting
> >>>> >> the properly designed and tested standard library containers
> >>>> >> than by spending time trying to improve the design of these
> >>>> >> legacy classes.  For simple uses that don't need to integrate
> >>>> >> with the GC machinery the standard containers should be fine
> >>>> >> (plus, it'd provide us with greater motivation to improve
> >>>> >> them and the code GCC emits for their uses).  Unfortunately,
> >>>> >> to be able to use the hash-based containers we would need to
> >>>> >> upgrade to C++ 11.  Isn't it time yet?
> >>>> >
> >>>> > I don't think so.  I'm also not sure if C++11 on its own is desirable
> >>>> > or if it should be C++14 or later at that point.  SLES 12 has GCC 4.8
> >>>> > as host compiler (but also GCC 7+ optionally), SLES 15 has GCC 7.
> >>>> > SLES 11 already struggles currently (GCC 4.3) but I'd no longer
> >>>> > consider that important enough.
> >>>> >
> >>>> > Note any such change to libstdc++ containers should be complete
> >>>> > and come with both code-size and compile-time and memory-usage
> >>>> > measurements (both of GCC and other apps of course).
> >>>>
> >>>> Can I go ahead and commit the patch?
> >>>
> >>> I think we need to document the requirements on Value classes better.
> >>>
> >>> @@ -177,7 +185,10 @@ public:
> >>>                                                   INSERT);
> >>>       bool ins = Traits::is_empty (*e);
> >>>       if (ins)
> >>> -       e->m_key = k;
> >>> +       {
> >>> +         e->m_key = k;
> >>> +         new ((void *)&e->m_value) Value ();
> >>> +       }
> >>>
> >>> this now requires a default constructor and I always forget about
> >>> differences between the different form of initializations -- for a POD,
> >>> does this zero the entry?
> >>>
> >>> Otherwise looks OK to me - I was hoping Jonathan would chime in here.
> >>
> >> The patch looks good to me. I 100% agree with Martin that put() should
> >> not destroy an existing element and recreate a new one. Assignment is
> >> the right way to update the value.
> >>
> >> And Value() is the correct initialization.
> >>
> >> The only change I'd suggest is something to enforce the "KeyId must be
> >> a trivial (POD) type" requirement:
> >>
> >> #if __GNUC__ >= 6 && __cplusplus >= 201103L
> >> static_assert(__is_pod(KeyId), "KeyId must be a trivial (POD) type");
> >> #endif
> >>
> >> This could actually be added for 4.7 and up (__is_pod is available
> >> earlier, but __cplusplus isn't set correctly before that) but GCC 6 is
> >> when we started to default to C++14. If the check only happens for
> >> people bootstrapping with new versions of GCC it's still going to
> >> catch misuses with non-POD types.
> >
> > I've updated the comments explaining the constraints in more detail
> > and added the static assert to hash_table where it covers both
> > has_map and hash_set.  FWIW, I don't imagine anyone instantiating
> > these containers on a non-trivial key types in GCC but having
> > the assert there doesn't hurt anything.
> >
> > Martin
> >
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] let hash-based containers work with non-trivial types (PR 90923)
  2019-07-01 16:34                 ` Richard Biener
@ 2019-07-01 18:34                   ` Martin Sebor
  0 siblings, 0 replies; 4+ messages in thread
From: Martin Sebor @ 2019-07-01 18:34 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jonathan Wakely, gcc mailing list, gcc-patches

On 7/1/19 10:33 AM, Richard Biener wrote:
> On Mon, Jul 1, 2019 at 4:55 PM Martin Sebor <msebor@gmail.com>
> wrote:>
>> [Adding gcc-patches]
>> 
>> Richard, do you have any further comments or is the revised patch 
>> good to commit?
> 
> No further comments from my side - it's good to commit.

After running a full bootstrap with the patch with the static_assert
I found that the I didn't fully understand the KeyId type/compare_type
requirements.  Like value_type, this type too can be a non-POD type.
It just needs a suitable Traits (AKA Descriptor) class.

I've updated the comments to reflect that and removed
the static_assert and checked in the original version of the change
with better comments in r272893.

Sorry about that hiccup.

Martin

> 
> Richard.
> 
>> Martin
>> 
>> On 6/25/19 2:30 PM, Martin Sebor wrote:
>>> On 6/25/19 3:53 AM, Jonathan Wakely wrote:
>>>> On 24/06/19 19:42 +0200, Richard Biener wrote:
>>>>> On Mon, Jun 24, 2019 at 4:35 PM Martin Sebor
>>>>> <msebor@gmail.com> wrote:
>>>>>> 
>>>>>> On 6/24/19 6:11 AM, Richard Biener wrote:
>>>>>>> On Fri, Jun 21, 2019 at 7:17 PM Martin Sebor
>>>>>>> <msebor@gmail.com>
>>>>>> wrote:
>>>>>>>> 
>>>>>>>> On 6/21/19 6:06 AM, Richard Biener wrote:
>>>>>>>>> On Wed, Jun 19, 2019 at 5:15 AM Martin Sebor
>>>>>>>>> <msebor@gmail.com>
>>>>>> wrote:
>>>>>>>>>> 
>>>>>>>>>> Bug 90923 shows that even though GCC hash-table
>>>>>>>>>> based containers like hash_map can be instantiated
>>>>>>>>>> on types with user-defined ctors and dtors they
>>>>>>>>>> invoke the dtors of such types without invoking the
>>>>>>>>>> corresponding ctors.
>>>>>>>>>> 
>>>>>>>>>> It was thanks to this bug that I spent a day
>>>>>>>>>> debugging
>>>>>> "interesting"
>>>>>>>>>> miscompilations during GCC bootstrap (in fairness,
>>>>>>>>>> it was that and bug 90904 about auto_vec copy
>>>>>>>>>> assignment/construction also being hosed even for
>>>>>>>>>> POD types).
>>>>>>>>>> 
>>>>>>>>>> The attached patch corrects the hash_map and
>>>>>>>>>> hash_set templates to invoke the ctors of the
>>>>>>>>>> elements they insert and makes them (hopefully)
>>>>>>>>>> safe to use with non-trivial user-defined types.
>>>>>>>>> 
>>>>>>>>> Hum.  I am worried about the difference of assignment
>>>>>>>>> vs.
>>>>>> construction
>>>>>>>>> in ::put()
>>>>>>>>> 
>>>>>>>>> +      bool ins = hash_entry::is_empty (*e); +
>>>>>>>>> if (ins) +       { +         e->m_key = k; +
>>>>>>>>> new ((void *) &e->m_value) Value (v); +       } +
>>>>>>>>> else +       e->m_value = v;
>>>>>>>>> 
>>>>>>>>> why not invoke the dtor on the old value and then the
>>>>>>>>> ctor again?
>>>>>>>> 
>>>>>>>> It wouldn't work for self-assignment:
>>>>>>>> 
>>>>>>>> Value &y = m.get_or_insert (key); m.put (key, y);
>>>>>>>> 
>>>>>>>> The usual rule of thumb for writes into containers is
>>>>>>>> to use construction when creating a new element and
>>>>>>>> assignment when replacing the value of an existing
>>>>>>>> element.
>>>>>>>> 
>>>>>>>> Which reminds me that the hash containers, despite
>>>>>>>> being copy- constructible (at least for POD types, they
>>>>>>>> aren't for user- defined types), also aren't safe for
>>>>>>>> assignment even for PODs. I opened bug 90959 for this.
>>>>>>>> Until the assignment is fixed I made it inaccessibe in
>>>>>>>> the patch (I have fixed the copy ctor to DTRT for
>>>>>>>> non-PODs).
>>>>>>>> 
>>>>>>>>> How is an empty hash_entry constructed?
>>>>>>>> 
>>>>>>>> In hash_table::find_slot_with_hash simply by finding an
>>>>>>>> empty slot and returning a pointer to it.  The memory
>>>>>>>> for the slot is marked "empty" by calling the
>>>>>>>> Traits::mark_empty() function.
>>>>>>>> 
>>>>>>>> The full type of hash_map<void*, Value> is actually
>>>>>>>> 
>>>>>>>> hash_map<void*, Value, 
>>>>>>>> simple_hashmap_traits<default_hash_traits<void*>, 
>>>>>>>> Value>
>>>>>>>> 
>>>>>>>> and simple_hashmap_traits delegates it to
>>>>>>>> default_hash_traits whose mark_empty() just clears the
>>>>>>>> void*, leaving the Value part uninitialized.  That
>>>>>>>> makes sense because we don't want to call ctors for
>>>>>>>> empty entries.  I think the questions one might ask if
>>>>>>>> one were to extend the design are: a) what class should
>>>>>>>> invoke the ctor/assignment and b) should it do it 
>>>>>>>> directly or via the traits?
>>>>>>>> 
>>>>>>>>> ::remove() doesn't seem to invoke the dtor either,
>>>>>>>>> instead it relies on the traits::remove function?
>>>>>>>> 
>>>>>>>> Yes.  There is no Traits::construct or assign or copy.
>>>>>>>> We could add them but I'm not sure I see to what end
>>>>>>>> (there could be use cases, I just don't know enough
>>>>>>>> about these classes to think of any).
>>>>>>>> 
>>>>>>>> Attached is an updated patch with the additional minor
>>>>>>>> fixes mentioned above.
>>>>>>>> 
>>>>>>>> Martin
>>>>>>>> 
>>>>>>>> PS I think we would get much better results by
>>>>>>>> adopting the properly designed and tested standard
>>>>>>>> library containers than by spending time trying to
>>>>>>>> improve the design of these legacy classes.  For simple
>>>>>>>> uses that don't need to integrate with the GC machinery
>>>>>>>> the standard containers should be fine (plus, it'd
>>>>>>>> provide us with greater motivation to improve them and
>>>>>>>> the code GCC emits for their uses).  Unfortunately, to
>>>>>>>> be able to use the hash-based containers we would need
>>>>>>>> to upgrade to C++ 11.  Isn't it time yet?
>>>>>>> 
>>>>>>> I don't think so.  I'm also not sure if C++11 on its own
>>>>>>> is desirable or if it should be C++14 or later at that
>>>>>>> point.  SLES 12 has GCC 4.8 as host compiler (but also
>>>>>>> GCC 7+ optionally), SLES 15 has GCC 7. SLES 11 already
>>>>>>> struggles currently (GCC 4.3) but I'd no longer consider
>>>>>>> that important enough.
>>>>>>> 
>>>>>>> Note any such change to libstdc++ containers should be
>>>>>>> complete and come with both code-size and compile-time
>>>>>>> and memory-usage measurements (both of GCC and other apps
>>>>>>> of course).
>>>>>> 
>>>>>> Can I go ahead and commit the patch?
>>>>> 
>>>>> I think we need to document the requirements on Value classes
>>>>> better.
>>>>> 
>>>>> @@ -177,7 +185,10 @@ public: INSERT); bool ins =
>>>>> Traits::is_empty (*e); if (ins) -       e->m_key = k; +
>>>>> { +         e->m_key = k; +         new ((void *)&e->m_value)
>>>>> Value (); +       }
>>>>> 
>>>>> this now requires a default constructor and I always forget
>>>>> about differences between the different form of
>>>>> initializations -- for a POD, does this zero the entry?
>>>>> 
>>>>> Otherwise looks OK to me - I was hoping Jonathan would chime
>>>>> in here.
>>>> 
>>>> The patch looks good to me. I 100% agree with Martin that put()
>>>> should not destroy an existing element and recreate a new one.
>>>> Assignment is the right way to update the value.
>>>> 
>>>> And Value() is the correct initialization.
>>>> 
>>>> The only change I'd suggest is something to enforce the "KeyId
>>>> must be a trivial (POD) type" requirement:
>>>> 
>>>> #if __GNUC__ >= 6 && __cplusplus >= 201103L 
>>>> static_assert(__is_pod(KeyId), "KeyId must be a trivial (POD)
>>>> type"); #endif
>>>> 
>>>> This could actually be added for 4.7 and up (__is_pod is
>>>> available earlier, but __cplusplus isn't set correctly before
>>>> that) but GCC 6 is when we started to default to C++14. If the
>>>> check only happens for people bootstrapping with new versions
>>>> of GCC it's still going to catch misuses with non-POD types.
>>> 
>>> I've updated the comments explaining the constraints in more
>>> detail and added the static assert to hash_table where it covers
>>> both has_map and hash_set.  FWIW, I don't imagine anyone
>>> instantiating these containers on a non-trivial key types in GCC
>>> but having the assert there doesn't hurt anything.
>>> 
>>> Martin
>>> 
>> 

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2019-07-01 18:34 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <b30b052e-97d1-ce1d-d4c2-816b0a05f2c1@gmail.com>
2019-06-19  3:17 ` [PATCH] let hash-based containers work with non-trivial types (PR 90923) Martin Sebor
     [not found] ` <CAFiYyc1scx6AxPi+ME77GB_5aNJcKrQPuugBts+7E2gnkxzPCA@mail.gmail.com>
     [not found]   ` <ab514c17-aa5f-0523-0330-c2c3681febd7@gmail.com>
     [not found]     ` <CAFiYyc213JPKKpQdFNNF0VpVzxVc4DkoPLkq6LTFX2W-Phf-Jg@mail.gmail.com>
     [not found]       ` <973790d7-7564-2f5a-100f-47bd930413c7@gmail.com>
     [not found]         ` <CAFiYyc3=3=ZKq_JCtAQVqNb8y+nKqfr8+hC72QcXh73w9oC7wQ@mail.gmail.com>
     [not found]           ` <20190625095328.GA30349@redhat.com>
     [not found]             ` <33e05a72-ba9e-ff20-bff5-6e8ccfdbc5b2@gmail.com>
2019-07-01 14:55               ` Martin Sebor
2019-07-01 16:34                 ` Richard Biener
2019-07-01 18:34                   ` Martin Sebor

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).