From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-oi1-x230.google.com (mail-oi1-x230.google.com [IPv6:2607:f8b0:4864:20::230]) by sourceware.org (Postfix) with ESMTPS id 3C47638582A9 for ; Mon, 1 Aug 2022 19:20:33 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 3C47638582A9 Received: by mail-oi1-x230.google.com with SMTP id p132so14140229oif.9 for ; Mon, 01 Aug 2022 12:20:33 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:cc:to:subject:message-id:date:from :in-reply-to:references:mime-version:x-gm-message-state:from:to:cc; bh=32SZmH7bq7dcsoi4F7rxmMsHQpsXBEiyk5fbLkqgT6A=; b=dcmbeDxwQYjDd4vzi30fEXmZ73ykVyAughRKfU1eNcOGGWkQ0WVhJ8cf7mrkOJsfa9 DyepPMNE2oCMLh/wNMXMg6KKvJ5xPY+wtPdwnlSmh07rWhPg+y3gv8N0tQdTHSa4BHCG D/ygrelnlEyv6gGj30DGiJiMQx2T6aqnd96NRd3nE7h3jbFED/lV8MJ8B3k070yav2BF wOKXt3zFX6hzU4uC3dcQOPQ1uebbLn4MBmv9LFBh5ovbY4y6Wn2eoomh0scDc+eHn5JQ rS+yePSUBV0JCmjSAtPN0BOK9lhebZ+b9UPK7Sq4QDBQgZHMZFys7aBjDYk81fnaIJf8 CAmA== X-Gm-Message-State: ACgBeo3LUHyDEkGMZEETx30VOcTDQXew3fPEtmRwkBrVAoS1JYQkHbdy JCIow4zjSILKgyTnvei6R3SVnNR+Qli8Z8If4xg= X-Google-Smtp-Source: AA6agR691JiBGbkqb/4D5T7v4T8BIbSo5kzcfgQ38gv15JGSMzRIYeZAeRMbcyvG/Gm586DBu50FclQ7Dlh4uShpe1w= X-Received: by 2002:a05:6808:1312:b0:341:cbcd:5387 with SMTP id y18-20020a056808131200b00341cbcd5387mr616261oiv.175.1659381632427; Mon, 01 Aug 2022 12:20:32 -0700 (PDT) MIME-Version: 1.0 References: <20220729123211.876374-1-adhemerval.zanella@linaro.org> In-Reply-To: <20220729123211.876374-1-adhemerval.zanella@linaro.org> From: Noah Goldstein Date: Tue, 2 Aug 2022 03:20:20 +0800 Message-ID: Subject: Re: [PATCH v2] stdlib: Simplify arc4random_uniform To: Adhemerval Zanella Cc: GNU C Library , Florian Weimer , "Carlos O'Donell" , Yann Droneaud , Paul Eggert Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Spam-Status: No, score=-9.5 required=5.0 tests=BAYES_00, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, FREEMAIL_FROM, GIT_PATCH_0, KAM_SHORT, RCVD_IN_DNSWL_NONE, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org X-BeenThere: libc-alpha@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Libc-alpha mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 01 Aug 2022 19:20:35 -0000 On Fri, Jul 29, 2022 at 8:32 PM Adhemerval Zanella via Libc-alpha wrote: > > It uses the bitmask with rejection [1], which calculates a mask > being the lowest power of two bounding the request upper bound, > successively queries new random values, and rejects values > outside the requested range. > > Performance-wise, there is no much gain in trying to conserve > bits since arc4random is wrapper on getrandom syscall. It should > be cheaper to just query a uint32_t value. The algorithm also > avoids modulo and divide operations, which might be costly > depending of the architecture. > > [1] https://www.pcg-random.org/posts/bounded-rands.html > > Reviewed-by: Yann Droneaud > --- > v2: Fixed typos in commit message and simplify loop. > --- > stdlib/arc4random_uniform.c | 129 +++++++++--------------------------- > 1 file changed, 30 insertions(+), 99 deletions(-) > > diff --git a/stdlib/arc4random_uniform.c b/stdlib/arc4random_uniform.c > index 1326dfa593..5aa98d1c13 100644 > --- a/stdlib/arc4random_uniform.c > +++ b/stdlib/arc4random_uniform.c > @@ -17,38 +17,19 @@ > License along with the GNU C Library; if not, see > . */ > > -#include > -#include > #include > #include > > -/* Return the number of bytes which cover values up to the limit. */ > -__attribute__ ((const)) > -static uint32_t > -byte_count (uint32_t n) > -{ > - if (n < (1U << 8)) > - return 1; > - else if (n < (1U << 16)) > - return 2; > - else if (n < (1U << 24)) > - return 3; > - else > - return 4; > -} > +/* Return a uniformly distributed random number less than N. The algori= thm > + calculates a mask being the lowest power of two bounding the upper bo= und > + N, successively queries new random values, and rejects values outside= of > + the request range. > > -/* Fill the lower bits of the result with randomness, according to the > - number of bytes requested. */ > -static void > -random_bytes (uint32_t *result, uint32_t byte_count) > -{ > - *result =3D 0; > - unsigned char *ptr =3D (unsigned char *) result; > - if (__BYTE_ORDER =3D=3D __BIG_ENDIAN) > - ptr +=3D 4 - byte_count; > - __arc4random_buf (ptr, byte_count); > -} > + For reject values, it also tries if the remaining entropy could fit o= n > + the asked range after range adjustment. > > + The algorithm avoids modulo and divide operations, which might be cos= tly > + depending on the architecture. */ > uint32_t > __arc4random_uniform (uint32_t n) > { > @@ -57,83 +38,33 @@ __arc4random_uniform (uint32_t n) > only possible result for limit 1. */ > return 0; > > - /* The bits variable serves as a source for bits. Prefetch the > - minimum number of bytes needed. */ > - uint32_t count =3D byte_count (n); > - uint32_t bits_length =3D count * CHAR_BIT; > - uint32_t bits; > - random_bytes (&bits, count); > - > /* Powers of two are easy. */ > if (powerof2 (n)) > - return bits & (n - 1); > - > - /* The general case. This algorithm follows J=C3=A9r=C3=A9mie Lumbros= o, > - Optimal Discrete Uniform Generation from Coin Flips, and > - Applications (2013), who credits Donald E. Knuth and Andrew > - C. Yao, The complexity of nonuniform random number generation > - (1976), for solving the general case. > + return __arc4random () & (n - 1); > > - The implementation below unrolls the initialization stage of the > - loop, where v is less than n. */ > + /* mask is the smallest power of 2 minus 1 number larger than n. */ > + int z =3D __builtin_clz (n); > + uint32_t mask =3D ~UINT32_C(0) >> z; > + int bits =3D CHAR_BIT * sizeof (uint32_t) - z; > > - /* Use 64-bit variables even though the intermediate results are > - never larger than 33 bits. This ensures the code is easier to > - compile on 64-bit architectures. */ > - uint64_t v; > - uint64_t c; > - > - /* Initialize v and c. v is the smallest power of 2 which is larger > - than n.*/ > - { > - uint32_t log2p1 =3D 32 - __builtin_clz (n); > - v =3D 1ULL << log2p1; > - c =3D bits & (v - 1); > - bits >>=3D log2p1; > - bits_length -=3D log2p1; > - } > - > - /* At the start of the loop, c is uniformly distributed within the > - half-open interval [0, v), and v < 2n < 2**33. */ > - while (true) > + while (1) > { > - if (v >=3D n) > - { > - /* If the candidate is less than n, accept it. */ > - if (c < n) > - /* c is uniformly distributed on [0, n). */ > - return c; > - else > - { > - /* c is uniformly distributed on [n, v). */ > - v -=3D n; > - c -=3D n; > - /* The distribution was shifted, so c is uniformly > - distributed on [0, v) again. */ > - } > - } > - /* v < n here. */ > - > - /* Replenish the bit source if necessary. */ > - if (bits_length =3D=3D 0) > - { > - /* Overwrite the least significant byte. */ > - random_bytes (&bits, 1); > - bits_length =3D CHAR_BIT; > - } > - > - /* Double the range. No overflow because v < n < 2**32. */ > - v *=3D 2; > - /* v < 2n here. */ > - > - /* Extract a bit and append it to c. c remains less than v and > - thus 2**33. */ > - c =3D (c << 1) | (bits & 1); > - bits >>=3D 1; > - --bits_length; > - > - /* At this point, c is uniformly distributed on [0, v) again, > - and v < 2n < 2**33. */ > + uint32_t value =3D __arc4random (); > + > + /* Return if the lower power of 2 minus 1 satisfy the condition. = */ > + uint32_t r =3D value & mask; > + if (r < n) > + return r; > + > + /* Otherwise check if remaining bits of entropy provides fits in t= he > + bound. */ > + for (int bits_left =3D z; bits_left >=3D bits; bits_left -=3D bits= ) > + { > + value >>=3D bits; Can this just be shift by 1 and repeat (32 - z) times or does that introduce bias (not seeing exactly why it would)? > + r =3D value & mask; > + if (r < n) > + return r; > + } > } > } > libc_hidden_def (__arc4random_uniform) > -- > 2.34.1 >