public inbox for libc-help@sourceware.org
 help / color / mirror / Atom feed
* More efficient fmod()
@ 2022-10-01 16:34 Oliver Schädlich
  2022-10-01 20:19 ` Adhemerval Zanella Netto
  0 siblings, 1 reply; 2+ messages in thread
From: Oliver Schädlich @ 2022-10-01 16:34 UTC (permalink / raw)
  To: libc-help

I found that fmod() could be faster.
This is my implementation:

#include <stdint.h>
#include <string.h>
#include <fenv.h>
#if defined(_MSC_VER)
     #include <intrin.h>
#endif

#if defined(__GNUC__) || defined(__clang__)
     #define likely(x) __builtin_expect((x), 1)
     #define unlikely(x) __builtin_expect((x), 0)
#else
     #define likely(x) (x)
     #define unlikely(x) (x)
#endif

inline uint64_t bin( double d )
{
     uint64_t u;
     memcpy( &u, &d, sizeof d );
     return u;
}

inline double dbl( uint64_t u )
{
     double d;
     memcpy( &d, &u, sizeof u );
     return d;
}

inline double invalid( uint64_t u )
{
     feraiseexcept( FE_INVALID );
     return dbl( u );
}

#define SIGN_BIT ((uint64_t)1 << 63)
#define EXP_MASK ((uint64_t)0x7FF << 52)
#define IMPLCIT_BIT ((uint64_t)1 << 52)
#define MANT_MASK (IMPLCIT_BIT - 1)
#define QNAN_BIT (IMPLCIT_BIT >> 1)

inline void normalize( uint64_t *mant, int *exp )
{
#if defined(__GNUC__) || defined(__clang__)
     unsigned bits;
     bits = __builtin_clz( *mant ) - 11;
     *mant <<= bits;
#elif defined(_MSC_VER)
     unsigned long bits;
     _BitScanReverse64( &bits, *mant );
     *mant <<= bits - 11 ;
#else
     unsigned bits;
     for( bits = 0; !(*mant & IMPLCIT_BIT); *mant <<= 1, ++bits );
#endif
     *exp -= bits;
}

double myFmodC( double counter, double denominator )
{
     uint64_t const
         bCounter = bin( counter ),
         bDenom = bin( denominator );
     uint64_t const sign = (bCounter ^ bDenom) & SIGN_BIT;
     if( unlikely((bCounter & EXP_MASK) == EXP_MASK) )
         // +/-[Inf|QNaN|SNaN] % ... = -QNaN
         // follow SSE/AVX-rules, first NaN rules, i.e.
         // first parameter determines non-SNaN/QNaN-bits
         return invalid( SIGN_BIT | bCounter | QNAN_BIT );
     if( unlikely((bDenom & EXP_MASK) == EXP_MASK) )
         // +/-x % +/-[Inf|QNan|SNaN]
         if( likely(!(bDenom & MANT_MASK)) )
             // +/-x % +/-Inf = -/+x
             return dbl( sign | bCounter & ~SIGN_BIT );
         else
             // +/-x % +/-[QNaN|SNaN] = -NaN
             return invalid( SIGN_BIT | bDenom | QNAN_BIT );
     int
         counterExp = (bCounter & EXP_MASK) >> 52,
         denomExp = (bDenom & EXP_MASK) >> 52;
     uint64_t
         counterMant = (uint64_t)!!counterExp << 52 | bCounter & MANT_MASK,
         denomMant = (uint64_t)!!denomExp << 52 | bDenom & MANT_MASK;
     if( unlikely(!counterExp) )
         // counter is denormal
         if( likely(!counterMant) )
             // counter == +/-0.0
             if( likely(denomMant) )
                 // +/-0.0 % +/-x = -/+0.0
                 return dbl( sign );
             else
                 // +/-0.0 % +/-0.0 = -QNaN
                 return invalid( SIGN_BIT | EXP_MASK | QNAN_BIT );
         else
             // normalize counter
             normalize( &counterMant, &counterExp ),
             ++counterExp;
     if( unlikely(!denomExp) )
         // denominator is denormal
         if( likely(!denomMant) )
             // +/-x % +/-0.0 = -/+QNaN
             return invalid( SIGN_BIT | EXP_MASK | QNAN_BIT );
         else
             // normalize denominator
             normalize( &denomMant, &denomExp ),
             ++denomExp;
     int exp = counterExp;
     uint64_t remainderMant = counterMant;
     for( ; ; )
     {
         int below = remainderMant < denomMant;
         if( unlikely(exp - below < denomExp) )
             break;
         exp -= below;
         remainderMant <<= below;
         if( unlikely(!(remainderMant -= denomMant)) )
         {
             exp = 0;
             break;
         }
         normalize( &remainderMant, &exp );
     };
     if( unlikely(exp <= 0) )
         // denormal result
         remainderMant >>= -exp + 1,
         exp = 0;
     return dbl( sign | (uint64_t)exp << 52 | remainderMant & MANT_MASK );
}

If I chose random pairs of doubles the above code takes nearly
40% less time on my Zen1-CPU than with the current glibc-Code.


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-10-01 20:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-01 16:34 More efficient fmod() Oliver Schädlich
2022-10-01 20:19 ` Adhemerval Zanella Netto

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).