From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <gcc-bugs-return-83342-listarch-gcc-bugs=gcc.gnu.org@gcc.gnu.org>
Received: (qmail 21920 invoked by alias); 23 Jan 2004 06:01:40 -0000
Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Archive: <http://gcc.gnu.org/ml/gcc-bugs/>
List-Post: <mailto:gcc-bugs@gcc.gnu.org>
List-Help: <mailto:gcc-bugs-help@gcc.gnu.org>
Sender: gcc-bugs-owner@gcc.gnu.org
Received: (qmail 21909 invoked by alias); 23 Jan 2004 06:01:38 -0000
Date: Fri, 23 Jan 2004 06:01:00 -0000
Message-ID: <20040123060138.21908.qmail@sources.redhat.com>
From: "rittle at latour dot rsch dot comm dot mot dot com" <gcc-bugzilla@gcc.gnu.org>
To: gcc-bugs@gcc.gnu.org
In-Reply-To: <20040123001452.13823.devison@pacificit.co.nz>
References: <20040123001452.13823.devison@pacificit.co.nz>
Reply-To: gcc-bugzilla@gcc.gnu.org
Subject: [Bug libstdc++/13823] Significant performance issue with std::map on multiple threads on dual processor - possibly default allocator
X-Bugzilla-Reason: CC
X-SW-Source: 2004-01/txt/msg02891.txt.bz2
List-Id: <gcc-bugs.sourceware.org>


------- Additional Comments From rittle at latour dot rsch dot comm dot mot dot com  2004-01-23 06:01 -------
Subject: Re:  Significant performance issue with std::map on
	multiple threads on dual processor - possibly default allocator


> I obtained the latest ext/mt_allocator.h file from cvs, and reran the test
> using it.  Otherwise I'm still using the gcc 3.3.2 headers.  BTW, I needed
> to make one addition to make it compile:
> namespace __gnu_cxx { using std::__throw_bad_alloc; }

Please apply this patch and rerun your test (it would also be great if
you could tell us whether your real application code sees major
improvement using this allocator with an MP machine; I know that might
be a little more work)...

[This patch is from Stefan, the original author of mt_allocator.h .]

Index: include/ext/mt_allocator.h
===================================================================
RCS file: /cvs/gcc/gcc/libstdc++-v3/include/ext/mt_allocator.h,v
retrieving revision 1.9
diff -c -r1.9 mt_allocator.h
*** include/ext/mt_allocator.h	20 Jan 2004 06:35:21 -0000	1.9
--- include/ext/mt_allocator.h	23 Jan 2004 05:53:48 -0000
***************
*** 77,95 ****
  
        __mt_alloc() throw() 
        {
! 	// XXX
        }
  
        __mt_alloc(const __mt_alloc&) throw()
        {
! 	// XXX
        }
  
        template<typename _Tp1>
          __mt_alloc(const __mt_alloc<_Tp1>&) throw()
          {
! 	  // XXX
! 	}
  
        ~__mt_alloc() throw() { }
  
--- 77,95 ----
  
        __mt_alloc() throw() 
        {
!         // XXX
        }
  
        __mt_alloc(const __mt_alloc&) throw()
        {
!         // XXX
        }
  
        template<typename _Tp1>
          __mt_alloc(const __mt_alloc<_Tp1>&) throw()
          {
!           // XXX
!         }
  
        ~__mt_alloc() throw() { }
  
***************
*** 237,243 ****
                __throw_bad_alloc();
              return static_cast<_Tp*>(__ret);
            }
! 	
          /*
           * Although the test in __gthread_once() would suffice, we
           * wrap test of the once condition in our own unlocked
--- 237,243 ----
                __throw_bad_alloc();
              return static_cast<_Tp*>(__ret);
            }
! 
          /*
           * Although the test in __gthread_once() would suffice, we
           * wrap test of the once condition in our own unlocked
***************
*** 269,275 ****
          size_t thread_id = 0;
  #endif
  
!         block_record* block;
  
          /*
           * Find out if we have blocks on our freelist.
--- 269,275 ----
          size_t thread_id = 0;
  #endif
  
!         block_record* block = NULL;
  
          /*
           * Find out if we have blocks on our freelist.
***************
*** 280,288 ****
            {
              /*
               * Are we using threads?
!              * - Yes, lock and check if there are free blocks on the global
!              *   list (and if not add new ones), get the first one
!              *   and change owner.
               * - No, all operations are made directly to global pool 0
               *   no need to lock or change ownership but check for free
               *   blocks on global list (and if not add new ones) and
--- 280,290 ----
            {
              /*
               * Are we using threads?
!              * - Yes, check if there are free blocks on the global
!              *   list. If so, grab up to block_count blocks in one
!              *   lock and change ownership. If the global list is 
!              *   empty, we allocate a new chunk and add those blocks 
!              *   directly to our own freelist (with us as owner).
               * - No, all operations are made directly to global pool 0
               *   no need to lock or change ownership but check for free
               *   blocks on global list (and if not add new ones) and
***************
*** 291,347 ****
  #ifdef __GTHREADS
              if (__gthread_active_p())
                {
                  __gthread_mutex_lock(_S_bin[bin].mutex);
  
                  if (_S_bin[bin].first[0] == NULL)
                    {
!                     _S_bin[bin].first[0] =
!                       (block_record*)malloc(_S_chunk_size);
  
!                     if (!_S_bin[bin].first[0])
!                       {
!                         __gthread_mutex_unlock(_S_bin[bin].mutex);
!                         __throw_bad_alloc();
!                       }
  
!                     size_t bin_t = 1 << bin;
!                     size_t block_count =
!                       _S_chunk_size /(bin_t + sizeof(block_record));
  
!                     _S_bin[bin].free[0] = block_count;
  
                      block_count--;
!                     block = _S_bin[bin].first[0];
  
                      while (block_count > 0)
                        {
                          block->next = (block_record*)((char*)block +
                                        (bin_t + sizeof(block_record)));
                          block = block->next;
                          block_count--;
                        }
  
                      block->next = NULL;
!                     _S_bin[bin].last[0] = block;
                    }
  
!                 block = _S_bin[bin].first[0];
  
!                 /*
!                  * Remove from list and count down the available counter on
!                  * global pool 0.
!                  */
!                 _S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
!                 _S_bin[bin].free[0]--;
  
!                 __gthread_mutex_unlock(_S_bin[bin].mutex);
  
                  /*
!                  * Now that we have removed the block from the global
!                  * freelist we can change owner and update the used
!                  * counter for this thread without locking.
                   */
!                 block->thread_id = thread_id;
                  _S_bin[bin].used[thread_id]++;
                }
              else
--- 293,375 ----
  #ifdef __GTHREADS
              if (__gthread_active_p())
                {
+                 size_t bin_t = 1 << bin;
+                 size_t block_count =
+                   _S_chunk_size /(bin_t + sizeof(block_record));
+ 
                  __gthread_mutex_lock(_S_bin[bin].mutex);
  
                  if (_S_bin[bin].first[0] == NULL)
                    {
!                     /*
!                      * No need to hold the lock when we are adding a
!                      * whole chunk to our own list
!                      */
!                     __gthread_mutex_unlock(_S_bin[bin].mutex);
  
!                     _S_bin[bin].first[thread_id] =
!                       (block_record*)malloc(_S_chunk_size);
  
!                     if (!_S_bin[bin].first[thread_id])
!                       __throw_bad_alloc();
  
!                     _S_bin[bin].free[thread_id] = block_count;
  
                      block_count--;
!                     block = _S_bin[bin].first[thread_id];
  
                      while (block_count > 0)
                        {
                          block->next = (block_record*)((char*)block +
                                        (bin_t + sizeof(block_record)));
+                         block->thread_id = thread_id;
                          block = block->next;
                          block_count--;
                        }
  
                      block->next = NULL;
!                     block->thread_id = thread_id;
!                     _S_bin[bin].last[thread_id] = block;
                    }
+                 else
+                   {
+                     size_t global_count = 0;
  
!                     while( _S_bin[bin].first[0] != NULL &&
!                            global_count < block_count )
!                       {
!                         block = _S_bin[bin].first[0];
  
!                         if (_S_bin[bin].first[thread_id] == NULL)
!                           _S_bin[bin].first[thread_id] = block;
!                         else
!                           _S_bin[bin].last[thread_id]->next = block;
  
!                         _S_bin[bin].last[thread_id] = block;
! 
!                         block->thread_id = thread_id;
! 
!                         _S_bin[bin].free[thread_id]++;
! 
!                         _S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
! 
!                         global_count++;
!                       }
! 
!                     block->next = NULL;
! 
!                     __gthread_mutex_unlock(_S_bin[bin].mutex);
!                   }
  
                  /*
!                  * Return the first newly added block in our list and
!                  * update the counters
                   */
!                 block = _S_bin[bin].first[thread_id];
!                 _S_bin[bin].first[thread_id] = 
!                   _S_bin[bin].first[thread_id]->next;
! 
!                 _S_bin[bin].free[thread_id]--;
                  _S_bin[bin].used[thread_id]++;
                }
              else
***************
*** 354,362 ****
  
                  size_t bin_t = 1 << bin;
                  size_t block_count = 
! 		  _S_chunk_size / (bin_t + sizeof(block_record));
! 
!                 _S_bin[bin].free[0] = block_count;
  
                  block_count--;
                  block = _S_bin[bin].first[0];
--- 382,388 ----
  
                  size_t bin_t = 1 << bin;
                  size_t block_count = 
!                   _S_chunk_size / (bin_t + sizeof(block_record));
  
                  block_count--;
                  block = _S_bin[bin].first[0];
***************
*** 375,386 ****
                  block = _S_bin[bin].first[0];
  
                  /*
!                  * Remove from list and count down the available counter on
!                  * global pool 0 and increase it's used counter.
                   */
                  _S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
-                 _S_bin[bin].free[0]--;
-                 _S_bin[bin].used[0]++;
                }
            }
          else
--- 401,409 ----
                  block = _S_bin[bin].first[0];
  
                  /*
!                  * Remove from list
                   */
                  _S_bin[bin].first[0] = _S_bin[bin].first[0]->next;
                }
            }
          else
***************
*** 392,399 ****
              block = _S_bin[bin].first[thread_id];
  
              _S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next;
!             _S_bin[bin].free[thread_id]--;
!             _S_bin[bin].used[thread_id]++;
            }
  
          return static_cast<_Tp*>(static_cast<void*>((char*)block + sizeof(block_record)));
--- 415,428 ----
              block = _S_bin[bin].first[thread_id];
  
              _S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next;
! 
! #ifdef __GTHREADS
!             if (__gthread_active_p())
!               {
!                 _S_bin[bin].free[thread_id]--;
!                 _S_bin[bin].used[thread_id]++;
!               }
! #endif
            }
  
          return static_cast<_Tp*>(static_cast<void*>((char*)block + sizeof(block_record)));
***************
*** 424,430 ****
  #endif
  
          block_record* block = (block_record*)((char*)__p
! 					      - sizeof(block_record));
  
          /*
           * This block will always be at the back of a list and thus
--- 453,459 ----
  #endif
  
          block_record* block = (block_record*)((char*)__p
!                                              - sizeof(block_record));
  
          /*
           * This block will always be at the back of a list and thus
***************
*** 465,471 ****
                      _S_bin[bin].first[thread_id] =
                        _S_bin[bin].first[thread_id]->next;
  
-                     _S_bin[bin].free[0]++;
                      _S_bin[bin].free[thread_id]--;
  
                      remove--;
--- 494,499 ----
***************
*** 509,517 ****
                _S_bin[bin].last[0]->next = block;
  
              _S_bin[bin].last[0] = block;
- 
-             _S_bin[bin].free[0]++;
-             _S_bin[bin].used[0]--;
            }
        }
      };
--- 537,542 ----
***************
*** 564,570 ****
  #ifdef __GTHREADS
        if (__gthread_active_p())
          {
! 	  _S_thread_freelist_first =
              (thread_record*)malloc(sizeof(thread_record) * _S_max_threads);
  
            if (!_S_thread_freelist_first)
--- 589,595 ----
  #ifdef __GTHREADS
        if (__gthread_active_p())
          {
!           _S_thread_freelist_first =
              (thread_record*)malloc(sizeof(thread_record) * _S_max_threads);
  
            if (!_S_thread_freelist_first)
***************
*** 578,584 ****
            for (i = 1; i < _S_max_threads; i++)
              {
                _S_thread_freelist_first[i - 1].next = 
! 		&_S_thread_freelist_first[i];
  
                _S_thread_freelist_first[i - 1].id = i;
              }
--- 603,609 ----
            for (i = 1; i < _S_max_threads; i++)
              {
                _S_thread_freelist_first[i - 1].next = 
!                 &_S_thread_freelist_first[i];
  
                _S_thread_freelist_first[i - 1].id = i;
              }
***************
*** 605,656 ****
        if (!_S_bin)
          __throw_bad_alloc();
  
!        for (size_t bin = 0; bin < _S_no_of_bins; bin++)
!         {
! 	  std::size_t __n = _S_max_threads + 1;
  
            _S_bin[bin].first = (block_record**) 
! 	    malloc(sizeof(block_record*) * __n);
  
            if (!_S_bin[bin].first)
              __throw_bad_alloc();
  
            _S_bin[bin].last = (block_record**) 
! 	    malloc(sizeof(block_record*) * __n);
  
            if (!_S_bin[bin].last)
              __throw_bad_alloc();
  
!           _S_bin[bin].free = (size_t*) malloc(sizeof(size_t) * __n);
  
!           if (!_S_bin[bin].free)
!             __throw_bad_alloc();
  
!           _S_bin[bin].used = (size_t*) malloc(sizeof(size_t) * __n);
  
!           if (!_S_bin[bin].used)
!             __throw_bad_alloc();
  
! #ifdef __GTHREADS
!           _S_bin[bin].mutex =(__gthread_mutex_t*) malloc(sizeof(__gthread_mutex_t));
  
  #ifdef __GTHREAD_MUTEX_INIT
! 	  {
! 	    // Do not copy a POSIX/gthr mutex once in use.
! 	    __gthread_mutex_t __tmp = __GTHREAD_MUTEX_INIT;
! 	    *_S_bin[bin].mutex = __tmp;
! 	  }
  #else
! 	  { __GTHREAD_MUTEX_INIT_FUNCTION (_S_bin[bin].mutex); }
  #endif
  #endif
  
!           for (size_t thread = 0; thread <= _S_max_threads; thread++)
              {
                _S_bin[bin].first[thread] = NULL;
                _S_bin[bin].last[thread] = NULL;
!               _S_bin[bin].free[thread] = 0;
!               _S_bin[bin].used[thread] = 0;
              }
          }
  
--- 630,694 ----
        if (!_S_bin)
          __throw_bad_alloc();
  
!       std::size_t __n = 1;
  
+ #ifdef __GTHREADS
+       if (__gthread_active_p())
+         __n = _S_max_threads + 1;
+ #endif
+ 
+       for (size_t bin = 0; bin < _S_no_of_bins; bin++)
+         {
            _S_bin[bin].first = (block_record**) 
!             malloc(sizeof(block_record*) * __n);
  
            if (!_S_bin[bin].first)
              __throw_bad_alloc();
  
            _S_bin[bin].last = (block_record**) 
!             malloc(sizeof(block_record*) * __n);
  
            if (!_S_bin[bin].last)
              __throw_bad_alloc();
  
! #ifdef __GTHREADS
!           if (__gthread_active_p())
!             {
!               _S_bin[bin].free = (size_t*) malloc(sizeof(size_t) * __n);
  
!               if (!_S_bin[bin].free)
!                 __throw_bad_alloc();
  
!               _S_bin[bin].used = (size_t*) malloc(sizeof(size_t) * __n);
  
!               if (!_S_bin[bin].used)
!                 __throw_bad_alloc();
  
!               _S_bin[bin].mutex =(__gthread_mutex_t*) malloc(sizeof(__gthread_mutex_t));
  
  #ifdef __GTHREAD_MUTEX_INIT
!               {
!                 // Do not copy a POSIX/gthr mutex once in use.
!                 __gthread_mutex_t __tmp = __GTHREAD_MUTEX_INIT;
!                 *_S_bin[bin].mutex = __tmp;
!               }
  #else
!               { __GTHREAD_MUTEX_INIT_FUNCTION (_S_bin[bin].mutex); }
  #endif
+             }
  #endif
  
!           for (size_t thread = 0; thread < __n; thread++)
              {
                _S_bin[bin].first[thread] = NULL;
                _S_bin[bin].last[thread] = NULL;
! #ifdef __GTHREADS
!               if (__gthread_active_p())
!                 {
!                   _S_bin[bin].free[thread] = 0;
!                   _S_bin[bin].used[thread] = 0;
!                 }
! #endif
              }
          }
  
***************
*** 783,788 ****
--- 821,838 ----
  
    template<typename _Tp> typename __mt_alloc<_Tp>::bin_record*
    volatile __mt_alloc<_Tp>::_S_bin = NULL;
+ 
+   template<typename _Tp>
+     inline bool
+     operator==(const __mt_alloc<_Tp>&,
+                const __mt_alloc<_Tp>&)
+     { return true; }
+   
+   template<typename _Tp>
+     inline bool
+     operator!=(const __mt_alloc<_Tp>&,
+                const __mt_alloc<_Tp>&)
+     { return false; }
  } // namespace __gnu_cxx
  
  #endif


-- 


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13823