From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 17845 invoked by alias); 16 May 2007 20:25:17 -0000 Received: (qmail 17799 invoked by uid 22791); 16 May 2007 20:25:14 -0000 X-Spam-Check-By: sourceware.org Received: from ug-out-1314.google.com (HELO ug-out-1314.google.com) (66.249.92.175) by sourceware.org (qpsmtpd/0.31) with ESMTP; Wed, 16 May 2007 20:25:08 +0000 Received: by ug-out-1314.google.com with SMTP id s2so158899uge for ; Wed, 16 May 2007 13:25:03 -0700 (PDT) Received: by 10.82.126.5 with SMTP id y5mr5872592buc.1179347102943; Wed, 16 May 2007 13:25:02 -0700 (PDT) Received: from ?194.249.15.34? ( [194.249.15.34]) by mx.google.com with ESMTP id j9sm62936mue.2007.05.16.13.24.58; Wed, 16 May 2007 13:25:01 -0700 (PDT) Message-ID: <464B6834.1050600@gmail.com> Date: Wed, 16 May 2007 20:25:00 -0000 From: Uros Bizjak User-Agent: Thunderbird 1.5.0.7 (X11/20061008) MIME-Version: 1.0 To: GCC Patches Subject: [PATCH, x86_64]: Provide longlong.h definitions for 128bit operations Content-Type: multipart/mixed; boundary="------------060507070805010709050201" Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2007-05/txt/msg01084.txt.bz2 This is a multi-part message in MIME format. --------------060507070805010709050201 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit Content-length: 736 Hello! This patch adds 128bit operations for x86_64 to longlong.h to speed up TImode and TFmode arithmetic. The patch also redefines i386's definitions of count_trailing/leading_zeros from asm to __builtin_ctz/__builtin_clz builtins, as provided by i386 backend. Patch was bootstrapped on x86_64-pc-linux-gnu, regression tested for all default languages with and without -m32. 2007-05-16 Uros Bizjak * longlong.h (__x86_64__): Add definitions for add_ssaaaa, sub_ddmmss, umul_ppmm, udiv_qrnnd, count_leading_zeros and count_trailing_zeros. (__i386__): Implement count_leading_zeros using __builtin_clz(). Implement count_trailing_zeros usign __builtin_ctz(). Uros. --------------060507070805010709050201 Content-Type: text/x-patch; name="x86_64-longlong.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="x86_64-longlong.diff" Content-length: 2243 Index: longlong.h =================================================================== --- longlong.h (revision 124771) +++ longlong.h (working copy) @@ -341,19 +341,48 @@ : "0" ((USItype) (n0)), \ "1" ((USItype) (n1)), \ "rm" ((USItype) (dv))) -#define count_leading_zeros(count, x) \ - do { \ - USItype __cbtmp; \ - __asm__ ("bsrl %1,%0" \ - : "=r" (__cbtmp) : "rm" ((USItype) (x))); \ - (count) = __cbtmp ^ 31; \ - } while (0) -#define count_trailing_zeros(count, x) \ - __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))) +#define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) +#define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) #define UMUL_TIME 40 #define UDIV_TIME 40 #endif /* 80x86 */ +#if (defined (__x86_64__) || defined (__i386__)) && W_TYPE_SIZE == 64 +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ + __asm__ ("addq %5,%1\n\tadcq %3,%0" \ + : "=r" ((UDItype) (sh)), \ + "=&r" ((UDItype) (sl)) \ + : "%0" ((UDItype) (ah)), \ + "rem" ((UDItype) (bh)), \ + "%1" ((UDItype) (al)), \ + "rem" ((UDItype) (bl))) +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \ + __asm__ ("subq %5,%1\n\tsbbq %3,%0" \ + : "=r" ((UDItype) (sh)), \ + "=&r" ((UDItype) (sl)) \ + : "0" ((UDItype) (ah)), \ + "rem" ((UDItype) (bh)), \ + "1" ((UDItype) (al)), \ + "rem" ((UDItype) (bl))) +#define umul_ppmm(w1, w0, u, v) \ + __asm__ ("mulq %3" \ + : "=a" ((UDItype) (w0)), \ + "=d" ((UDItype) (w1)) \ + : "%0" ((UDItype) (u)), \ + "rm" ((UDItype) (v))) +#define udiv_qrnnd(q, r, n1, n0, dv) \ + __asm__ ("divq %4" \ + : "=a" ((UDItype) (q)), \ + "=d" ((UDItype) (r)) \ + : "0" ((UDItype) (n0)), \ + "1" ((UDItype) (n1)), \ + "rm" ((UDItype) (dv))) +#define count_leading_zeros(count, x) ((count) = __builtin_clzl (x)) +#define count_trailing_zeros(count, x) ((count) = __builtin_ctzl (x)) +#define UMUL_TIME 40 +#define UDIV_TIME 40 +#endif /* x86_64 */ + #if defined (__i960__) && W_TYPE_SIZE == 32 #define umul_ppmm(w1, w0, u, v) \ ({union {UDItype __ll; \ --------------060507070805010709050201--