From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 28959 invoked by alias); 17 Jun 2011 18:38:16 -0000 Received: (qmail 28946 invoked by uid 22791); 17 Jun 2011 18:38:14 -0000 X-SWARE-Spam-Status: No, hits=0.3 required=5.0 tests=AWL,BAYES_50,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,RCVD_IN_DNSWL_NONE,TW_CL,TW_LZ,TW_TQ,TW_YQ X-Spam-Check-By: sourceware.org Received: from mo-p00-ob.rzone.de (HELO mo-p00-ob.rzone.de) (81.169.146.162) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 17 Jun 2011 18:37:53 +0000 X-RZG-AUTH: :LXoWVUeid/7A29J/hMvvT2k715jHQaJercGObUOFkj18odoYNahU4Q== X-RZG-CLASS-ID: mo00 Received: from [192.168.0.22] (business-188-111-022-002.static.arcor-ip.net [188.111.22.2]) by post.strato.de (mrclete mo17) (RZmta 25.18) with ESMTPA id w01c68n5HGSJg2 ; Fri, 17 Jun 2011 20:37:44 +0200 (MEST) Message-ID: <4DFB9EF8.2010109@gjlay.de> Date: Fri, 17 Jun 2011 19:14:00 -0000 From: Georg-Johann Lay User-Agent: Thunderbird 2.0.0.24 (X11/20100302) MIME-Version: 1.0 To: "Joseph S. Myers" CC: gcc-patches@gcc.gnu.org, Denis Chertykov , Anatoly Sokolov , "Eric B. Weddington" , Richard Henderson Subject: Re: [Patch, AVR]: QI builtins for parity, popcount, 1<< n References: <4DFA26FE.1000400@gjlay.de> <4DFB32BA.9090009@gjlay.de> <4DFB9DD9.9070700@gjlay.de> In-Reply-To: <4DFB9DD9.9070700@gjlay.de> Content-Type: multipart/mixed; boundary="------------080001050605040404080109" X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2011-06/txt/msg01356.txt.bz2 This is a multi-part message in MIME format. --------------080001050605040404080109 Content-Type: text/plain; charset=ISO-8859-1 Content-Transfer-Encoding: 7bit Content-length: 787 Georg-Johann Lay schrieb: > To come back to the original topic, here is a tentative patch for > better popcount and parity: > > * config/avr/t-avr (LIB1ASMFUNCS): Rename _loop_ffsqi2 to > _ffsqi2_nz. > * confif/avr/libgcc.S: Ditto. Rename __loop_ffsqi2 to __ffsqi2_nz. > (__ctzsi2, __ctzhi2): Map zero to 255. > (__popcounthi2): Use r27 instead of r30. > (__popcountdi2): Use r30 instead of r27. > * config/avr/avr.md (parityhi2): New expander. > (popcounthi2): New expander. > (popcountsi2): New expander. > (*parityhi2.libgcc): New insn. > (*parityqihi2.libgcc): New insn. > (*popcounthi2.libgcc): New insn. > (*popcountsi2.libgcc): New insn. > (*popcountqi2.libgcc): New insn. > (*popcountqihi2.libgcc): New insn_and_split. > > Johann Oops, picked the wrong file. --------------080001050605040404080109 Content-Type: text/x-patch; name="popcount.diff" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="popcount.diff" Content-length: 5584 Index: config/avr/libgcc.S =================================================================== --- config/avr/libgcc.S (revision 175149) +++ config/avr/libgcc.S (working copy) @@ -935,7 +935,7 @@ DEFUN __ffssi2 brne 1f ret 1: mov r24, r22 - XJMP __loop_ffsqi2 + XJMP __ffsqi2_nz ENDF __ffssi2 #endif /* defined (L_ffssi2) */ @@ -946,7 +946,7 @@ ENDF __ffssi2 DEFUN __ffshi2 clr r26 cpse r24, __zero_reg__ -1: XJMP __loop_ffsqi2 +1: XJMP __ffsqi2_nz ldi r26, 8 or r24, r25 brne 1b @@ -954,20 +954,20 @@ DEFUN __ffshi2 ENDF __ffshi2 #endif /* defined (L_ffshi2) */ -#if defined (L_loop_ffsqi2) +#if defined (L_ffsqi2_nz) ;; Helper for ffshi2, ffssi2 ;; r25:r24 = r26 + zero_extend16 (ffs8(r24)) ;; r24 must be != 0 ;; clobbers: r26 -DEFUN __loop_ffsqi2 +DEFUN __ffsqi2_nz inc r26 lsr r24 - brcc __loop_ffsqi2 + brcc __ffsqi2_nz mov r24, r26 clr r25 ret -ENDF __loop_ffsqi2 -#endif /* defined (L_loop_ffsqi2) */ +ENDF __ffsqi2_nz +#endif /* defined (L_ffsqi2_nz) */ /********************************** @@ -977,12 +977,11 @@ ENDF __loop_ffsqi2 #if defined (L_ctzsi2) ;; count trailing zeros ;; r25:r24 = ctz32 (r25:r22) -;; ctz(0) = 32 +;; ctz(0) = 255 +;; Note that ctz(0) is undefined for GCC. DEFUN __ctzsi2 XCALL __ffssi2 dec r24 - sbrc r24, 7 - ldi r24, 32 ret ENDF __ctzsi2 #endif /* defined (L_ctzsi2) */ @@ -990,12 +989,11 @@ ENDF __ctzsi2 #if defined (L_ctzhi2) ;; count trailing zeros ;; r25:r24 = ctz16 (r25:r24) -;; ctz(0) = 16 +;; ctz(0) = 255 +;; Note that ctz(0) is undefined for GCC. DEFUN __ctzhi2 XCALL __ffshi2 dec r24 - sbrc r24, 7 - ldi r24, 16 ret ENDF __ctzhi2 #endif /* defined (L_ctzhi2) */ @@ -1129,13 +1127,13 @@ ENDF __parityqi2 #if defined (L_popcounthi2) ;; population count ;; r25:r24 = popcount16 (r25:r24) -;; clobbers: r30, __tmp_reg__ +;; clobbers: r27, __tmp_reg__ DEFUN __popcounthi2 XCALL __popcountqi2 - mov r30, r24 + mov r27, r24 mov r24, r25 XCALL __popcountqi2 - add r24, r30 + add r24, r27 clr r25 ret ENDF __popcounthi2 @@ -1144,7 +1142,7 @@ ENDF __popcounthi2 #if defined (L_popcountsi2) ;; population count ;; r25:r24 = popcount32 (r25:r22) -;; clobbers: r26, r30, __tmp_reg__ +;; clobbers: r26, r27, __tmp_reg__ DEFUN __popcountsi2 XCALL __popcounthi2 mov r26, r24 @@ -1162,13 +1160,13 @@ ENDF __popcountsi2 ;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__ DEFUN __popcountdi2 XCALL __popcountsi2 - mov r27, r24 + mov r30, r24 mov_l r22, r18 mov_h r23, r19 mov_l r24, r20 mov_h r25, r21 XCALL __popcountsi2 - add r24, r27 + add r24, r30 ret ENDF __popcountdi2 #endif /* defined (L_popcountdi2) */ Index: config/avr/avr.md =================================================================== --- config/avr/avr.md (revision 175149) +++ config/avr/avr.md (working copy) @@ -3321,6 +3321,92 @@ (define_insn "delay_cycles_4" [(set_attr "length" "9") (set_attr "cc" "clobber")]) +(define_expand "parityhi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (set (reg:HI 24) + (parity:HI (reg:HI 24))) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_insn "*parityhi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:HI 24)))] + "" + "%~call __parityhi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*parityqihi2.libgcc" + [(set (reg:HI 24) + (parity:HI (reg:QI 24)))] + "" + "%~call __parityqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_expand "popcounthi2" + [(set (reg:HI 24) + (match_operand:HI 1 "register_operand" "")) + (parallel[(set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (clobber (reg:QI 27))]) + (set (match_operand:HI 0 "register_operand" "") + (reg:HI 24))] + "" + "") + +(define_expand "popcountsi2" + [(set (reg:SI 22) + (match_operand:SI 1 "register_operand" "")) + (parallel[(set (reg:HI 24) + (popcount:HI (reg:SI 22))) + (clobber (reg:HI 26))]) + (set (match_operand:SI 0 "register_operand" "") + (zero_extend:SI (reg:HI 24)))] + "" + "") + +(define_insn "*popcounthi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:HI 24))) + (clobber (reg:QI 27))] + "" + "%~call __popcounthi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountsi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:SI 22))) + (clobber (reg:HI 26))] + "" + "%~call __popcountsi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn "*popcountqi2.libgcc" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24)))] + "" + "%~call __popcountqi2" + [(set_attr "type" "xcall") + (set_attr "cc" "clobber")]) + +(define_insn_and_split "*popcountqihi2.libgcc" + [(set (reg:HI 24) + (popcount:HI (reg:QI 24)))] + "" + "#" + "" + [(set (reg:QI 24) + (popcount:QI (reg:QI 24))) + (set (reg:QI 25) + (const_int 0))] + "") + ;; CPU instructions ;; NOP taking 1 or 2 Ticks Index: config/avr/t-avr =================================================================== --- config/avr/t-avr (revision 175149) +++ config/avr/t-avr (working copy) @@ -53,7 +53,7 @@ LIB1ASMFUNCS = \ _dtors \ _ffssi2 \ _ffshi2 \ - _loop_ffsqi2 \ + _ffsqi2_nz \ _ctzsi2 \ _ctzhi2 \ _clzdi2 \ --------------080001050605040404080109--