From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mail-lf1-x132.google.com (mail-lf1-x132.google.com [IPv6:2a00:1450:4864:20::132]) by sourceware.org (Postfix) with ESMTPS id 06AFF3858C39 for ; Tue, 7 Mar 2023 15:44:56 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.2 sourceware.org 06AFF3858C39 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=gmail.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=gmail.com Received: by mail-lf1-x132.google.com with SMTP id bi9so17664746lfb.2 for ; Tue, 07 Mar 2023 07:44:55 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20210112; t=1678203894; h=to:subject:message-id:date:from:mime-version:from:to:cc:subject :date:message-id:reply-to; bh=T2Vp37XDmmFW6pK09GUi7J7D+Fnb9XBNS8gmTlD5v0M=; b=qXbFEESKR67/NKFzQECCEO3ibl+WU+Keqp0hbu24W0X8wvGMdesb/aiKMJXdIOGVkH FxMOf3QMTePiUJyWxmwmnm9v+Gweyh42qN81JyJXKfedHZRzmEIqsUSYD3eTQYcfkUe7 1xCShgYZM7Eu64yT7MxAcJsplnHkuzB09U5IGt3GSaxfjLApuGmpUXSsCzItJYJDFWOi KPWxMlInMJ7gjT0zzTtfphVA7POcSZgXOkZHpSrJV44CRU3U3EMtXMB7vU8EoxbAW9oo oO7SIrm0mRilHawhXlOjMpIn7zc9iAPpvMBmhyB7vq8aUyn9siFbhHEKfFK5XkJq3M2d qvdw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20210112; t=1678203894; h=to:subject:message-id:date:from:mime-version:x-gm-message-state :from:to:cc:subject:date:message-id:reply-to; bh=T2Vp37XDmmFW6pK09GUi7J7D+Fnb9XBNS8gmTlD5v0M=; b=WF30DVdJERByO6lsqbfNqsnEyEDPKC2xv6y2ld58DhMgteFbhpI7vuG46nMhPMQVbR TgzPk/dyeo5YsrZIlyugJjC6w4W2CFN5fwFI+BBdd8YJu01gs2MS8nSNcRlhKJGUfLsr v78RGamxloZToRCMtNfuxGTVB+jW06zhR7tiRMo/EqINcao4GGRWQHu8S0J4Ww0Iwfpm fUDbk/cos12OFKRCeesDkj3VLkqO2JXttUTCdKHoazt5ChU3x8Tm8pOu98JQsC2pWzgP lXVAT0faRYVP7wquRHVYNkBHzwy4UW5V6iLjvHf7KSn/TKhhlWJJTVFDyA2wDTeU6Xh0 GUJw== X-Gm-Message-State: AO0yUKXCez2lxIb7/sUk9MwsEw3rial0lG01XbIco56w6vpbcHyEO2e0 JurN4qUbg5UbcJ72Mf5meYZKJLVUbawC449A3u9TvJLZ4g== X-Google-Smtp-Source: AK7set8Va+zG5tNbpJ/q6KEMX4NsCerCA9zgKiHK9DEH/flkT1kzJ6Y7KQlYsttVfoMvgBLv8mjhmr05Fu90jkO5sjk= X-Received: by 2002:a05:6512:51a:b0:4d5:ca32:6ae3 with SMTP id o26-20020a056512051a00b004d5ca326ae3mr4412527lfb.3.1678203894533; Tue, 07 Mar 2023 07:44:54 -0800 (PST) MIME-Version: 1.0 From: Edison von Myositis Date: Tue, 7 Mar 2023 16:46:11 +0100 Message-ID: Subject: Improvement of fmod() To: libc-alpha@sourceware.org Content-Type: multipart/alternative; boundary="000000000000fe931705f6514aa9" X-Spam-Status: No, score=-0.1 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,FREEMAIL_FROM,HTML_MESSAGE,RCVD_IN_DNSWL_NONE,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: --000000000000fe931705f6514aa9 Content-Type: text/plain; charset="UTF-8" I've implemented fmod() in a way that it runs +105% to +150% faster than the 30 yrs. old implementation from sun. It requires sth. like SETcc and BSR / LZCNT on x86. #include #include #if defined(_MSC_VER) #include #endif #define LIKELY(x) __builtin_expect((x), 1) #define UNLIKELY(x) __builtin_expect((x), 0) #define MAX_EXP (0x7FF) #define SIGN_BIT ((uint64_t)1 << 63) #define EXP_MASK ((uint64_t)MAX_EXP << 52) #define IMPLCIT_BIT ((uint64_t)1 << 52) #define MANT_MASK (IMPLCIT_BIT - 1) #define HAS_MAX_EXP(b) ((b) >= EXP_MASK) #define HAS_INF_MANT(b) (!((b) & MANT_MASK)) inline uint64_t bin( double d ) { uint64_t u; memcpy( &u, &d, sizeof d ); return u; } inline double dbl( uint64_t u ) { double d; memcpy( &d, &u, sizeof u ); return d; } inline void normalize( uint64_t *mant, int *exp ) { unsigned bits = __builtin_clzll( *mant ) - 11; *mant <<= bits; *exp -= bits; } double myFmodC<( double counter, double denom ) { uint64_t bCounter = bin( counter ), bDenom = bin( denom ) & ~SIGN_BIT, bSign = bCounter & SIGN_BIT; bCounter &= ~SIGN_BIT; if( UNLIKELY(!bDenom) || UNLIKELY(HAS_MAX_EXP(bCounter)) ) return (counter * denom) / (counter * denom); if( UNLIKELY(HAS_MAX_EXP(bDenom)) ) if( LIKELY(HAS_INF_MANT(bDenom)) ) return counter; else return (counter * denom) / (counter * denom); if( UNLIKELY(!bCounter) ) return counter; int counterExp = bCounter >> 52 & MAX_EXP, denomExp = bDenom >> 52 & MAX_EXP; uint64_t counterMant = (uint64_t)(counterExp != 0) << 52 | bCounter & MANT_MASK, denomMant = (uint64_t)(denomExp != 0) << 52 | bDenom & MANT_MASK; if( UNLIKELY(!counterExp) ) // normalize counter normalize( &counterMant, &counterExp ), ++counterExp; if( UNLIKELY(!denomExp) ) // normalize denominator normalize( &denomMant, &denomExp ), ++denomExp; int remExp = counterExp; uint64_t remMant = counterMant; for( ; ; ) { int below = remMant < denomMant; if( UNLIKELY(remExp - below < denomExp) ) break; remExp -= below; remMant <<= below; if( UNLIKELY(!(remMant -= denomMant)) ) { remExp = 0; break; } normalize( &remMant, &remExp ); }; if( UNLIKELY(remExp <= 0) ) // denormal result remMant >>= -remExp + 1, remExp = 0; return dbl( bSign | (uint64_t)remExp << 52 | remMant & MANT_MASK ); } The results are binary-compatible to those of glibc i.e. all the (S)NaN- and Inf-results are all the same and all finite results are the same. --000000000000fe931705f6514aa9--