From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 10444 invoked by alias); 1 May 2016 14:18:12 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 10431 invoked by uid 89); 1 May 2016 14:18:11 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.7 required=5.0 tests=AWL,BAYES_00,RCVD_IN_DNSWL_LOW,RP_MATCHES_RCVD,SPF_HELO_PASS,SPF_PASS autolearn=ham version=3.3.2 spammy=sk:_GLIBCX, sk:_glibcx, __r, Ie X-HELO: plane.gmane.org Received: from plane.gmane.org (HELO plane.gmane.org) (80.91.229.3) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES256-SHA encrypted) ESMTPS; Sun, 01 May 2016 14:18:10 +0000 Received: from list by plane.gmane.org with local (Exim 4.69) (envelope-from ) id 1awsCE-0002oy-H7 for gcc-patches@gcc.gnu.org; Sun, 01 May 2016 16:18:06 +0200 Received: from i23017.upc-i.chello.nl ([62.195.23.17]) by main.gmane.org with esmtp (Gmexim 0.1 (Debian)) id 1AlnuQ-0007hv-00 for ; Sun, 01 May 2016 16:18:06 +0200 Received: from eelis by i23017.upc-i.chello.nl with local (Gmexim 0.1 (Debian)) id 1AlnuQ-0007hv-00 for ; Sun, 01 May 2016 16:18:06 +0200 To: gcc-patches@gcc.gnu.org From: Eelis Subject: [patch, libstdc++] std::shuffle: Generate two swap positions at a time if possible Date: Sun, 01 May 2016 14:18:00 -0000 Message-ID: Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="------------080009090103090900070602" X-Mozilla-News-Host: news://news.gmane.org:119 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.3.0 X-IsSubscribed: yes X-SW-Source: 2016-05/txt/msg00010.txt.bz2 This is a multi-part message in MIME format. --------------080009090103090900070602 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-length: 494 Hi, The attached patch optimizes std::shuffle for the very common case where the generator range is large enough that a single invocation can produce two swap positions. This reduces the runtime of the following testcase by 37% on my machine: int main() { std::mt19937 gen; std::vector v; v.reserve(10000); for (int i = 0; i != 10000; ++i) { v.push_back(i); std::shuffle(v.begin(), v.end(), gen); } std::cout << v.front() << '\n'; } Thoughts? Thanks, Eelis --------------080009090103090900070602 Content-Type: text/x-patch; name="double-step-shuffle.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="double-step-shuffle.patch" Content-length: 2440 Index: libstdc++-v3/include/bits/stl_algo.h =================================================================== --- libstdc++-v3/include/bits/stl_algo.h (revision 235680) +++ libstdc++-v3/include/bits/stl_algo.h (working copy) @@ -3708,6 +3708,22 @@ #endif #ifdef _GLIBCXX_USE_C99_STDINT_TR1 + + template + inline _IntType + __generate_random_index_below(_IntType __bound, _UniformRandomNumberGenerator& __g) + { + const _IntType __urngrange = __g.max() - __g.min() + 1; + const _IntType __scaling = __urngrange / __bound; + const _IntType __past = __bound * __scaling; + + for (;;) + { + const _IntType __r = _IntType(__g()) - __g.min(); + if (__r < __past) return __r / __scaling; + } + } + /** * @brief Shuffle the elements of a sequence using a uniform random * number generator. @@ -3740,6 +3756,40 @@ typedef typename std::make_unsigned<_DistanceType>::type __ud_type; typedef typename std::uniform_int_distribution<__ud_type> __distr_type; typedef typename __distr_type::param_type __p_type; + + typedef typename std::remove_reference<_UniformRandomNumberGenerator>::type _Gen; + typedef typename std::common_type::type __uc_type; + + const __uc_type __urngrange = _Gen::max() - _Gen::min() + 1; + const __uc_type __urange = __uc_type(__last - __first); + + if (__urngrange / __urange >= __urange) + // I.e. (__urngrange >= __urange * __urange) but without wrap issues. + { + for (_RandomAccessIterator __i = __first + 1; __i != __last; ) + { + const __uc_type __swap_range = __uc_type(__i - __first) + 1; + + if (__i + 1 == __last) + { + const __uc_type __pos = __generate_random_index_below(__swap_range, __g); + std::iter_swap(__i, __first + __pos); + return; + } + + // Use a single generator invocation to produce swap positions for + // both of the next two elements: + + const __uc_type __comp_range = __swap_range * (__swap_range + 1); + const __uc_type __pospos = __generate_random_index_below(__comp_range, __g); + + std::iter_swap(__i++, __first + (__pospos % __swap_range)); + std::iter_swap(__i++, __first + (__pospos / __swap_range)); + } + + return; + } + __distr_type __d; for (_RandomAccessIterator __i = __first + 1; __i != __last; ++__i) --------------080009090103090900070602--