From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 21920 invoked by alias); 23 Jan 2004 06:01:40 -0000 Mailing-List: contact gcc-bugs-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Archive: List-Post: List-Help: Sender: gcc-bugs-owner@gcc.gnu.org Received: (qmail 21909 invoked by alias); 23 Jan 2004 06:01:38 -0000 Date: Fri, 23 Jan 2004 06:01:00 -0000 Message-ID: <20040123060138.21908.qmail@sources.redhat.com> From: "rittle at latour dot rsch dot comm dot mot dot com" To: gcc-bugs@gcc.gnu.org In-Reply-To: <20040123001452.13823.devison@pacificit.co.nz> References: <20040123001452.13823.devison@pacificit.co.nz> Reply-To: gcc-bugzilla@gcc.gnu.org Subject: [Bug libstdc++/13823] Significant performance issue with std::map on multiple threads on dual processor - possibly default allocator X-Bugzilla-Reason: CC X-SW-Source: 2004-01/txt/msg02891.txt.bz2 List-Id: ------- Additional Comments From rittle at latour dot rsch dot comm dot mot dot com 2004-01-23 06:01 ------- Subject: Re: Significant performance issue with std::map on multiple threads on dual processor - possibly default allocator > I obtained the latest ext/mt_allocator.h file from cvs, and reran the test > using it. Otherwise I'm still using the gcc 3.3.2 headers. BTW, I needed > to make one addition to make it compile: > namespace __gnu_cxx { using std::__throw_bad_alloc; } Please apply this patch and rerun your test (it would also be great if you could tell us whether your real application code sees major improvement using this allocator with an MP machine; I know that might be a little more work)... [This patch is from Stefan, the original author of mt_allocator.h .] Index: include/ext/mt_allocator.h =================================================================== RCS file: /cvs/gcc/gcc/libstdc++-v3/include/ext/mt_allocator.h,v retrieving revision 1.9 diff -c -r1.9 mt_allocator.h *** include/ext/mt_allocator.h 20 Jan 2004 06:35:21 -0000 1.9 --- include/ext/mt_allocator.h 23 Jan 2004 05:53:48 -0000 *************** *** 77,95 **** __mt_alloc() throw() { ! // XXX } __mt_alloc(const __mt_alloc&) throw() { ! // XXX } template __mt_alloc(const __mt_alloc<_Tp1>&) throw() { ! // XXX ! } ~__mt_alloc() throw() { } --- 77,95 ---- __mt_alloc() throw() { ! // XXX } __mt_alloc(const __mt_alloc&) throw() { ! // XXX } template __mt_alloc(const __mt_alloc<_Tp1>&) throw() { ! // XXX ! } ~__mt_alloc() throw() { } *************** *** 237,243 **** __throw_bad_alloc(); return static_cast<_Tp*>(__ret); } ! /* * Although the test in __gthread_once() would suffice, we * wrap test of the once condition in our own unlocked --- 237,243 ---- __throw_bad_alloc(); return static_cast<_Tp*>(__ret); } ! /* * Although the test in __gthread_once() would suffice, we * wrap test of the once condition in our own unlocked *************** *** 269,275 **** size_t thread_id = 0; #endif ! block_record* block; /* * Find out if we have blocks on our freelist. --- 269,275 ---- size_t thread_id = 0; #endif ! block_record* block = NULL; /* * Find out if we have blocks on our freelist. *************** *** 280,288 **** { /* * Are we using threads? ! * - Yes, lock and check if there are free blocks on the global ! * list (and if not add new ones), get the first one ! * and change owner. * - No, all operations are made directly to global pool 0 * no need to lock or change ownership but check for free * blocks on global list (and if not add new ones) and --- 280,290 ---- { /* * Are we using threads? ! * - Yes, check if there are free blocks on the global ! * list. If so, grab up to block_count blocks in one ! * lock and change ownership. If the global list is ! * empty, we allocate a new chunk and add those blocks ! * directly to our own freelist (with us as owner). * - No, all operations are made directly to global pool 0 * no need to lock or change ownership but check for free * blocks on global list (and if not add new ones) and *************** *** 291,347 **** #ifdef __GTHREADS if (__gthread_active_p()) { __gthread_mutex_lock(_S_bin[bin].mutex); if (_S_bin[bin].first[0] == NULL) { ! _S_bin[bin].first[0] = ! (block_record*)malloc(_S_chunk_size); ! if (!_S_bin[bin].first[0]) ! { ! __gthread_mutex_unlock(_S_bin[bin].mutex); ! __throw_bad_alloc(); ! } ! size_t bin_t = 1 << bin; ! size_t block_count = ! _S_chunk_size /(bin_t + sizeof(block_record)); ! _S_bin[bin].free[0] = block_count; block_count--; ! block = _S_bin[bin].first[0]; while (block_count > 0) { block->next = (block_record*)((char*)block + (bin_t + sizeof(block_record))); block = block->next; block_count--; } block->next = NULL; ! _S_bin[bin].last[0] = block; } ! block = _S_bin[bin].first[0]; ! /* ! * Remove from list and count down the available counter on ! * global pool 0. ! */ ! _S_bin[bin].first[0] = _S_bin[bin].first[0]->next; ! _S_bin[bin].free[0]--; ! __gthread_mutex_unlock(_S_bin[bin].mutex); /* ! * Now that we have removed the block from the global ! * freelist we can change owner and update the used ! * counter for this thread without locking. */ ! block->thread_id = thread_id; _S_bin[bin].used[thread_id]++; } else --- 293,375 ---- #ifdef __GTHREADS if (__gthread_active_p()) { + size_t bin_t = 1 << bin; + size_t block_count = + _S_chunk_size /(bin_t + sizeof(block_record)); + __gthread_mutex_lock(_S_bin[bin].mutex); if (_S_bin[bin].first[0] == NULL) { ! /* ! * No need to hold the lock when we are adding a ! * whole chunk to our own list ! */ ! __gthread_mutex_unlock(_S_bin[bin].mutex); ! _S_bin[bin].first[thread_id] = ! (block_record*)malloc(_S_chunk_size); ! if (!_S_bin[bin].first[thread_id]) ! __throw_bad_alloc(); ! _S_bin[bin].free[thread_id] = block_count; block_count--; ! block = _S_bin[bin].first[thread_id]; while (block_count > 0) { block->next = (block_record*)((char*)block + (bin_t + sizeof(block_record))); + block->thread_id = thread_id; block = block->next; block_count--; } block->next = NULL; ! block->thread_id = thread_id; ! _S_bin[bin].last[thread_id] = block; } + else + { + size_t global_count = 0; ! while( _S_bin[bin].first[0] != NULL && ! global_count < block_count ) ! { ! block = _S_bin[bin].first[0]; ! if (_S_bin[bin].first[thread_id] == NULL) ! _S_bin[bin].first[thread_id] = block; ! else ! _S_bin[bin].last[thread_id]->next = block; ! _S_bin[bin].last[thread_id] = block; ! ! block->thread_id = thread_id; ! ! _S_bin[bin].free[thread_id]++; ! ! _S_bin[bin].first[0] = _S_bin[bin].first[0]->next; ! ! global_count++; ! } ! ! block->next = NULL; ! ! __gthread_mutex_unlock(_S_bin[bin].mutex); ! } /* ! * Return the first newly added block in our list and ! * update the counters */ ! block = _S_bin[bin].first[thread_id]; ! _S_bin[bin].first[thread_id] = ! _S_bin[bin].first[thread_id]->next; ! ! _S_bin[bin].free[thread_id]--; _S_bin[bin].used[thread_id]++; } else *************** *** 354,362 **** size_t bin_t = 1 << bin; size_t block_count = ! _S_chunk_size / (bin_t + sizeof(block_record)); ! ! _S_bin[bin].free[0] = block_count; block_count--; block = _S_bin[bin].first[0]; --- 382,388 ---- size_t bin_t = 1 << bin; size_t block_count = ! _S_chunk_size / (bin_t + sizeof(block_record)); block_count--; block = _S_bin[bin].first[0]; *************** *** 375,386 **** block = _S_bin[bin].first[0]; /* ! * Remove from list and count down the available counter on ! * global pool 0 and increase it's used counter. */ _S_bin[bin].first[0] = _S_bin[bin].first[0]->next; - _S_bin[bin].free[0]--; - _S_bin[bin].used[0]++; } } else --- 401,409 ---- block = _S_bin[bin].first[0]; /* ! * Remove from list */ _S_bin[bin].first[0] = _S_bin[bin].first[0]->next; } } else *************** *** 392,399 **** block = _S_bin[bin].first[thread_id]; _S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next; ! _S_bin[bin].free[thread_id]--; ! _S_bin[bin].used[thread_id]++; } return static_cast<_Tp*>(static_cast((char*)block + sizeof(block_record))); --- 415,428 ---- block = _S_bin[bin].first[thread_id]; _S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next; ! ! #ifdef __GTHREADS ! if (__gthread_active_p()) ! { ! _S_bin[bin].free[thread_id]--; ! _S_bin[bin].used[thread_id]++; ! } ! #endif } return static_cast<_Tp*>(static_cast((char*)block + sizeof(block_record))); *************** *** 424,430 **** #endif block_record* block = (block_record*)((char*)__p ! - sizeof(block_record)); /* * This block will always be at the back of a list and thus --- 453,459 ---- #endif block_record* block = (block_record*)((char*)__p ! - sizeof(block_record)); /* * This block will always be at the back of a list and thus *************** *** 465,471 **** _S_bin[bin].first[thread_id] = _S_bin[bin].first[thread_id]->next; - _S_bin[bin].free[0]++; _S_bin[bin].free[thread_id]--; remove--; --- 494,499 ---- *************** *** 509,517 **** _S_bin[bin].last[0]->next = block; _S_bin[bin].last[0] = block; - - _S_bin[bin].free[0]++; - _S_bin[bin].used[0]--; } } }; --- 537,542 ---- *************** *** 564,570 **** #ifdef __GTHREADS if (__gthread_active_p()) { ! _S_thread_freelist_first = (thread_record*)malloc(sizeof(thread_record) * _S_max_threads); if (!_S_thread_freelist_first) --- 589,595 ---- #ifdef __GTHREADS if (__gthread_active_p()) { ! _S_thread_freelist_first = (thread_record*)malloc(sizeof(thread_record) * _S_max_threads); if (!_S_thread_freelist_first) *************** *** 578,584 **** for (i = 1; i < _S_max_threads; i++) { _S_thread_freelist_first[i - 1].next = ! &_S_thread_freelist_first[i]; _S_thread_freelist_first[i - 1].id = i; } --- 603,609 ---- for (i = 1; i < _S_max_threads; i++) { _S_thread_freelist_first[i - 1].next = ! &_S_thread_freelist_first[i]; _S_thread_freelist_first[i - 1].id = i; } *************** *** 605,656 **** if (!_S_bin) __throw_bad_alloc(); ! for (size_t bin = 0; bin < _S_no_of_bins; bin++) ! { ! std::size_t __n = _S_max_threads + 1; _S_bin[bin].first = (block_record**) ! malloc(sizeof(block_record*) * __n); if (!_S_bin[bin].first) __throw_bad_alloc(); _S_bin[bin].last = (block_record**) ! malloc(sizeof(block_record*) * __n); if (!_S_bin[bin].last) __throw_bad_alloc(); ! _S_bin[bin].free = (size_t*) malloc(sizeof(size_t) * __n); ! if (!_S_bin[bin].free) ! __throw_bad_alloc(); ! _S_bin[bin].used = (size_t*) malloc(sizeof(size_t) * __n); ! if (!_S_bin[bin].used) ! __throw_bad_alloc(); ! #ifdef __GTHREADS ! _S_bin[bin].mutex =(__gthread_mutex_t*) malloc(sizeof(__gthread_mutex_t)); #ifdef __GTHREAD_MUTEX_INIT ! { ! // Do not copy a POSIX/gthr mutex once in use. ! __gthread_mutex_t __tmp = __GTHREAD_MUTEX_INIT; ! *_S_bin[bin].mutex = __tmp; ! } #else ! { __GTHREAD_MUTEX_INIT_FUNCTION (_S_bin[bin].mutex); } #endif #endif ! for (size_t thread = 0; thread <= _S_max_threads; thread++) { _S_bin[bin].first[thread] = NULL; _S_bin[bin].last[thread] = NULL; ! _S_bin[bin].free[thread] = 0; ! _S_bin[bin].used[thread] = 0; } } --- 630,694 ---- if (!_S_bin) __throw_bad_alloc(); ! std::size_t __n = 1; + #ifdef __GTHREADS + if (__gthread_active_p()) + __n = _S_max_threads + 1; + #endif + + for (size_t bin = 0; bin < _S_no_of_bins; bin++) + { _S_bin[bin].first = (block_record**) ! malloc(sizeof(block_record*) * __n); if (!_S_bin[bin].first) __throw_bad_alloc(); _S_bin[bin].last = (block_record**) ! malloc(sizeof(block_record*) * __n); if (!_S_bin[bin].last) __throw_bad_alloc(); ! #ifdef __GTHREADS ! if (__gthread_active_p()) ! { ! _S_bin[bin].free = (size_t*) malloc(sizeof(size_t) * __n); ! if (!_S_bin[bin].free) ! __throw_bad_alloc(); ! _S_bin[bin].used = (size_t*) malloc(sizeof(size_t) * __n); ! if (!_S_bin[bin].used) ! __throw_bad_alloc(); ! _S_bin[bin].mutex =(__gthread_mutex_t*) malloc(sizeof(__gthread_mutex_t)); #ifdef __GTHREAD_MUTEX_INIT ! { ! // Do not copy a POSIX/gthr mutex once in use. ! __gthread_mutex_t __tmp = __GTHREAD_MUTEX_INIT; ! *_S_bin[bin].mutex = __tmp; ! } #else ! { __GTHREAD_MUTEX_INIT_FUNCTION (_S_bin[bin].mutex); } #endif + } #endif ! for (size_t thread = 0; thread < __n; thread++) { _S_bin[bin].first[thread] = NULL; _S_bin[bin].last[thread] = NULL; ! #ifdef __GTHREADS ! if (__gthread_active_p()) ! { ! _S_bin[bin].free[thread] = 0; ! _S_bin[bin].used[thread] = 0; ! } ! #endif } } *************** *** 783,788 **** --- 821,838 ---- template typename __mt_alloc<_Tp>::bin_record* volatile __mt_alloc<_Tp>::_S_bin = NULL; + + template + inline bool + operator==(const __mt_alloc<_Tp>&, + const __mt_alloc<_Tp>&) + { return true; } + + template + inline bool + operator!=(const __mt_alloc<_Tp>&, + const __mt_alloc<_Tp>&) + { return false; } } // namespace __gnu_cxx #endif -- http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13823