From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-ed1-x535.google.com (mail-ed1-x535.google.com [IPv6:2a00:1450:4864:20::535]) by sourceware.org (Postfix) with ESMTPS id E115E3858D38 for ; Sat, 1 Oct 2022 16:34:17 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org E115E3858D38 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-ed1-x535.google.com with SMTP id c30so9570903edn.2 for ; Sat, 01 Oct 2022 09:34:17 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; h=content-transfer-encoding:subject:to:user-agent:mime-version:date :message-id:from:from:to:cc:subject:date; bh=CqKk3UmNVuUwD+sDWRd8leDhaYB4aWFmOCbCPpmgHE4=; b=ULo/ySNH4H25X0aKoJUnAUdckBJtgve/88bnpFitDBvPtvUFLvjZTtaHAS17RNxCGU 8l/9Wt0DoknHLn1cTSgGIAFw0Uu9hbcSHyhL4HHxr6LvjNS8GtZI3aZHLeDXycdrDXiu nmOslCMTe6QCvxy1Bxe8sOT+Tef1pFOdaWVfte1ApLb5PRSb3rFIdfEVVgIOnvCC3ZnQ ytyJY9hfzLYNdH8zagSpWW9eqLcWZAyi9wI8zpRztFgDVsA64crxF2FIxhxPiw2lWrl3 VNIf7/WDPxTjS8fG5nshJIbdyMMLb/UxGKFLEReVstqyKo57aoUgoFS8ryhHARyNGXR6 CTwg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; h=content-transfer-encoding:subject:to:user-agent:mime-version:date :message-id:from:x-gm-message-state:from:to:cc:subject:date; bh=CqKk3UmNVuUwD+sDWRd8leDhaYB4aWFmOCbCPpmgHE4=; b=0vOQ/c1mtnZwsTib3PO/LkqPgIOiFRrLQB/tcaCt06lZxpCKo/Uo1n8TIVq4M4iwzO Oq1LACzqYDHHUh8tAAac4KnRhj+IkFWWfTJs045L0hVMXcI9w4wKJ9oolK6Vf4TNDuIg NoJ5ALj8a1ZhMda4JAK84qJnGqvwwjGQ85VfmmwzIlG8q4bOI/dH1VeYY+PRMQYEwzl5 F5Gbm/UUg6l4ZHmu/hJIGV3yLe5ppJ+YmyikXIFuX+cfuN0W5zLHTi/5VcI7P3GlrF1L tvfXkHg4jjvWI17csJ8UNS+TVZLNw+Ohyr3pXa/b7T8LbxTAF28Yqb7rGGT7sbegr3x5 bqdA== X-Gm-Message-State: ACrzQf3Oabd47l82e+dlPDlXyl71MXwS1ff9InO7PEYg0mZCUE9mESpM Hb7ghcJFdhdECsLHlcKpsceYFAinfVwq1A== X-Google-Smtp-Source: AMsMyM6tdEVeDdPzIDOKzl4QLEpBKhqvAfJPGqEyRI1UhxQadCrDkKF5hoHWgDJZe45ery3BX/IZ+A== X-Received: by 2002:a05:6402:1761:b0:455:37bb:93c3 with SMTP id da1-20020a056402176100b0045537bb93c3mr11768555edb.403.1664642056646; Sat, 01 Oct 2022 09:34:16 -0700 (PDT) Received: from [192.168.0.71] (ip-176-199-153-100.um44.pools.vodafone-ip.de. [176.199.153.100]) by smtp.gmail.com with ESMTPSA id l9-20020a1709063d2900b0076f0ab594e9sm2925376ejf.73.2022.10.01.09.34.16 (version=TLS1_3 cipher=TLS_AES_128_GCM_SHA256 bits=128/128); Sat, 01 Oct 2022 09:34:16 -0700 (PDT) From: "=?UTF-8?Q?Oliver_Sch=c3=a4dlich?=" X-Google-Original-From: =?UTF-8?Q?Oliver_Sch=c3=a4dlich?= Message-ID: <211e8b64-519a-6037-62fc-7fcac1983ac4@gmail.com> Date: Sat, 1 Oct 2022 18:34:16 +0200 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Thunderbird/102.3.1 To: libc-help@sourceware.org Subject: More efficient fmod() Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 8bit X-Spam-Status: No, score=0.0 required=5.0 tests=BAYES_00,BODY_8BITS,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: I found that fmod() could be faster. This is my implementation: #include #include #include #if defined(_MSC_VER)     #include #endif #if defined(__GNUC__) || defined(__clang__)     #define likely(x) __builtin_expect((x), 1)     #define unlikely(x) __builtin_expect((x), 0) #else     #define likely(x) (x)     #define unlikely(x) (x) #endif inline uint64_t bin( double d ) {     uint64_t u;     memcpy( &u, &d, sizeof d );     return u; } inline double dbl( uint64_t u ) {     double d;     memcpy( &d, &u, sizeof u );     return d; } inline double invalid( uint64_t u ) {     feraiseexcept( FE_INVALID );     return dbl( u ); } #define SIGN_BIT ((uint64_t)1 << 63) #define EXP_MASK ((uint64_t)0x7FF << 52) #define IMPLCIT_BIT ((uint64_t)1 << 52) #define MANT_MASK (IMPLCIT_BIT - 1) #define QNAN_BIT (IMPLCIT_BIT >> 1) inline void normalize( uint64_t *mant, int *exp ) { #if defined(__GNUC__) || defined(__clang__)     unsigned bits;     bits = __builtin_clz( *mant ) - 11;     *mant <<= bits; #elif defined(_MSC_VER)     unsigned long bits;     _BitScanReverse64( &bits, *mant );     *mant <<= bits - 11 ; #else     unsigned bits;     for( bits = 0; !(*mant & IMPLCIT_BIT); *mant <<= 1, ++bits ); #endif     *exp -= bits; } double myFmodC( double counter, double denominator ) {     uint64_t const         bCounter = bin( counter ),         bDenom = bin( denominator );     uint64_t const sign = (bCounter ^ bDenom) & SIGN_BIT;     if( unlikely((bCounter & EXP_MASK) == EXP_MASK) )         // +/-[Inf|QNaN|SNaN] % ... = -QNaN         // follow SSE/AVX-rules, first NaN rules, i.e.         // first parameter determines non-SNaN/QNaN-bits         return invalid( SIGN_BIT | bCounter | QNAN_BIT );     if( unlikely((bDenom & EXP_MASK) == EXP_MASK) )         // +/-x % +/-[Inf|QNan|SNaN]         if( likely(!(bDenom & MANT_MASK)) )             // +/-x % +/-Inf = -/+x             return dbl( sign | bCounter & ~SIGN_BIT );         else             // +/-x % +/-[QNaN|SNaN] = -NaN             return invalid( SIGN_BIT | bDenom | QNAN_BIT );     int         counterExp = (bCounter & EXP_MASK) >> 52,         denomExp = (bDenom & EXP_MASK) >> 52;     uint64_t         counterMant = (uint64_t)!!counterExp << 52 | bCounter & MANT_MASK,         denomMant = (uint64_t)!!denomExp << 52 | bDenom & MANT_MASK;     if( unlikely(!counterExp) )         // counter is denormal         if( likely(!counterMant) )             // counter == +/-0.0             if( likely(denomMant) )                 // +/-0.0 % +/-x = -/+0.0                 return dbl( sign );             else                 // +/-0.0 % +/-0.0 = -QNaN                 return invalid( SIGN_BIT | EXP_MASK | QNAN_BIT );         else             // normalize counter             normalize( &counterMant, &counterExp ),             ++counterExp;     if( unlikely(!denomExp) )         // denominator is denormal         if( likely(!denomMant) )             // +/-x % +/-0.0 = -/+QNaN             return invalid( SIGN_BIT | EXP_MASK | QNAN_BIT );         else             // normalize denominator             normalize( &denomMant, &denomExp ),             ++denomExp;     int exp = counterExp;     uint64_t remainderMant = counterMant;     for( ; ; )     {         int below = remainderMant < denomMant;         if( unlikely(exp - below < denomExp) )             break;         exp -= below;         remainderMant <<= below;         if( unlikely(!(remainderMant -= denomMant)) )         {             exp = 0;             break;         }         normalize( &remainderMant, &exp );     };     if( unlikely(exp <= 0) )         // denormal result         remainderMant >>= -exp + 1,         exp = 0;     return dbl( sign | (uint64_t)exp << 52 | remainderMant & MANT_MASK ); } If I chose random pairs of doubles the above code takes nearly 40% less time on my Zen1-CPU than with the current glibc-Code.