public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Patch,AVR]: PR49313
@ 2011-07-27 16:29 Georg-Johann Lay
  2011-07-27 16:48 ` Richard Henderson
  0 siblings, 1 reply; 3+ messages in thread
From: Georg-Johann Lay @ 2011-07-27 16:29 UTC (permalink / raw)
  To: gcc-patches
  Cc: Anatoly Sokolov, Denis Chertykov, Eric Weddington, Richard Henderson

[-- Attachment #1: Type: text/plain, Size: 1867 bytes --]

This patch is to finalize the work on PR49313, i.e. better libgcc
implementation of some functions like bswap, counting zeros,
parity and popcount.

These functions are already implemented in libgcc.

This patch now provides a better integration of these functions:
the calls are no more emit as ordinary black box calls by optabs,
instead there are insns to describe the exact register usage of
the functions which are represented as implicit library calls.

This is advantageous because some call-clobbered registers are not
touched and there are more leaf-functions.

Some libgcc functions have minor changes to reduce register
footprint.

Besides that, copysignsf3 is implemented which is easy on avr.

Ok to commit?

Johann


	PR target/49313
	* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
	(__ctzsi2): Result for 0 may be undefined.
	(__ctzhi2): Result for 0 may be undefined.
	(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
	(__popcountsi2): Ditto. And don't clobber r26.
	(__popcountdi2): Ditto. And don't clobber r27.
	* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
	(parityhi2): New expand.
	(paritysi2): New expand.
	(popcounthi2): New expand.
	(popcountsi2): New expand.
	(clzhi2): New expand.
	(clzsi2): New expand.
	(ctzhi2): New expand.
	(ctzsi2): New expand.
	(ffshi2): New expand.
	(ffssi2): New expand.
	(copysignsf2): New insn.
	(bswapsi2): New expand.
	(*parityhi2.libgcc): New insn.
	(*parityqihi2.libgcc): New insn.
	(*paritysihi2.libgcc): New insn.
	(*popcounthi2.libgcc): New insn.
	(*popcountsi2.libgcc): New insn.
	(*popcountqi2.libgcc): New insn.
	(*popcountqihi2.libgcc): New insn-and-split.
	(*clzhi2.libgcc): New insn.
	(*clzsihi2.libgcc): New insn.
	(*ctzhi2.libgcc): New insn.
	(*ctzsihi2.libgcc): New insn.
	(*ffshi2.libgcc): New insn.
	(*ffssihi2.libgcc): New insn.
	(*bswapsi2.libgcc): New insn.

[-- Attachment #2: builtin-libgcc-insns.diff --]
[-- Type: text/x-patch, Size: 9869 bytes --]

Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S	(revision 176818)
+++ config/avr/libgcc.S	(working copy)
@@ -1061,9 +1061,15 @@ ENDF __ffssi2
 ;; clobbers: r26
 DEFUN __ffshi2
     clr  r26
+#ifdef __AVR_HAVE_JMP_CALL__
+    ;; Some cores have problem skipping 2-word instruction
+    tst  r24
+    breq 2f
+#else
     cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
 1:  XJMP __loop_ffsqi2
-    ldi  r26, 8
+2:  ldi  r26, 8
     or   r24, r25
     brne 1b
     ret
@@ -1093,12 +1099,12 @@ ENDF __loop_ffsqi2
 #if defined (L_ctzsi2)
 ;; count trailing zeros
 ;; r25:r24 = ctz32 (r25:r22)
-;; ctz(0) = 32
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
 DEFUN __ctzsi2
     XCALL __ffssi2
     dec  r24
-    sbrc r24, 7
-    ldi  r24, 32
     ret
 ENDF __ctzsi2
 #endif /* defined (L_ctzsi2) */
@@ -1106,12 +1112,12 @@ ENDF __ctzsi2
 #if defined (L_ctzhi2)
 ;; count trailing zeros
 ;; r25:r24 = ctz16 (r25:r24)
-;; ctz(0) = 16
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
 DEFUN __ctzhi2
     XCALL __ffshi2
     dec  r24
-    sbrc r24, 7
-    ldi  r24, 16
     ret
 ENDF __ctzhi2
 #endif /* defined (L_ctzhi2) */
@@ -1245,47 +1251,50 @@ ENDF __parityqi2
 #if defined (L_popcounthi2)
 ;; population count
 ;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: r30, __tmp_reg__
+;; clobbers: __tmp_reg__
 DEFUN __popcounthi2
     XCALL __popcountqi2
-    mov  r30, r24
+    push r24
     mov  r24, r25
     XCALL __popcountqi2
-    add  r24, r30
     clr  r25
-    ret
+    ;; FALLTHRU
 ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+    pop   __tmp_reg__
+    add   r24, __tmp_reg__
+    ret
+ENDF __popcounthi2_tail
 #endif /* defined (L_popcounthi2) */
 
 #if defined (L_popcountsi2)
 ;; population count
 ;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: r26, r30, __tmp_reg__
+;; clobbers: __tmp_reg__
 DEFUN __popcountsi2
     XCALL __popcounthi2
-    mov   r26, r24
+    push  r24
     mov_l r24, r22
     mov_h r25, r23
     XCALL __popcounthi2
-    add   r24, r26
-    ret
+    XJMP  __popcounthi2_tail
 ENDF __popcountsi2
 #endif /* defined (L_popcountsi2) */
 
 #if defined (L_popcountdi2)
 ;; population count
 ;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__
+;; clobbers: r22, r23, __tmp_reg__
 DEFUN __popcountdi2
     XCALL __popcountsi2
-    mov   r27, r24
+    push  r24
     mov_l r22, r18
     mov_h r23, r19
     mov_l r24, r20
     mov_h r25, r21
     XCALL __popcountsi2
-    add   r24, r27
-    ret
+    XJMP  __popcounthi2_tail
 ENDF __popcountdi2
 #endif /* defined (L_popcountdi2) */
 
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md	(revision 176818)
+++ config/avr/avr.md	(working copy)
@@ -55,6 +55,7 @@ (define_c_enum "unspec"
    UNSPEC_FMUL
    UNSPEC_FMULS
    UNSPEC_FMULSU
+   UNSPEC_COPYSIGN
    ])
 
 (define_c_enum "unspecv"
@@ -3680,6 +3681,275 @@ (define_insn "delay_cycles_4"
   [(set_attr "length" "9")
    (set_attr "cc" "clobber")])
 
+
+;; Parity
+
+(define_expand "parityhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (parity:HI (reg:HI 24)))
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "paritysi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (parity:HI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*parityhi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:HI 24)))]
+  ""
+  "%~call __parityhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:QI 24)))]
+  ""
+  "%~call __parityqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*paritysihi2.libgcc"
+  [(set (reg:HI 24)
+        (parity:HI (reg:SI 22)))]
+  ""
+  "%~call __paritysi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
+;; Popcount
+
+(define_expand "popcounthi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "popcountsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:HI 24)
+        (popcount:HI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*popcounthi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:HI 24)))]
+  ""
+  "%~call __popcounthi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:SI 22)))]
+  ""
+  "%~call __popcountsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))]
+  ""
+  "%~call __popcountqi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+  [(set (reg:HI 24)
+        (popcount:HI (reg:QI 24)))]
+  ""
+  "#"
+  ""
+  [(set (reg:QI 24)
+        (popcount:QI (reg:QI 24)))
+   (set (reg:QI 25)
+        (const_int 0))]
+  "")
+
+;; Count Leading Zeros
+
+(define_expand "clzhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (clz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "clzsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (clz:HI (reg:SI 22)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*clzhi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (clz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __clzhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*clzsihi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (clz:HI (reg:SI 22)))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __clzsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Count Trailing Zeros
+
+(define_expand "ctzhi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ctz:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "ctzsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ctz:HI (reg:SI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*ctzhi2.libgcc"
+  [(set (reg:HI 24)
+        (ctz:HI (reg:HI 24)))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ctzhi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*ctzsihi2.libgcc"
+  [(set (reg:HI 24)
+        (ctz:HI (reg:SI 22)))
+   (clobber (reg:QI 22))
+   (clobber (reg:QI 26))]
+  ""
+  "%~call __ctzsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Find First Set
+
+(define_expand "ffshi2"
+  [(set (reg:HI 24)
+        (match_operand:HI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])
+   (set (match_operand:HI 0 "register_operand" "")
+        (reg:HI 24))]
+  ""
+  "")
+
+(define_expand "ffssi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:SI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])
+   (set (match_operand:SI 0 "register_operand" "")
+        (zero_extend:SI (reg:HI 24)))]
+  ""
+  "")
+
+(define_insn "*ffshi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:HI 24)))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __ffshi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+(define_insn "*ffssihi2.libgcc"
+  [(parallel [(set (reg:HI 24)
+                   (ffs:HI (reg:SI 22)))
+              (clobber (reg:QI 22))
+              (clobber (reg:QI 26))])]
+  ""
+  "%~call __ffssi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+;; Copysign
+
+(define_insn "copysignsf3"
+  [(set (match_operand:SF 0 "register_operand"             "=r")
+        (unspec:SF [(match_operand:SF 1 "register_operand"  "0")
+                    (match_operand:SF 2 "register_operand"  "r")]
+                   UNSPEC_COPYSIGN))]
+  ""
+  "bst %D2,7\;bld %D0,7"
+  [(set_attr "length" "2")
+   (set_attr "cc" "none")])
+  
+;; Swap Bytes (change byte-endianess)
+
+(define_expand "bswapsi2"
+  [(set (reg:SI 22)
+        (match_operand:SI 1 "register_operand" ""))
+   (set (reg:SI 22)
+        (bswap:SI (reg:SI 22)))
+   (set (match_operand:SI 0 "register_operand" "")
+        (reg:SI 22))]
+  ""
+  "")
+
+(define_insn "*bswapsi2.libgcc"
+  [(set (reg:SI 22)
+        (bswap:SI (reg:SI 22)))]
+  ""
+  "%~call __bswapsi2"
+  [(set_attr "type" "xcall")
+   (set_attr "cc" "clobber")])
+
+
 ;; CPU instructions
 
 ;; NOP taking 1 or 2 Ticks 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Patch,AVR]: PR49313
  2011-07-27 16:29 [Patch,AVR]: PR49313 Georg-Johann Lay
@ 2011-07-27 16:48 ` Richard Henderson
  2011-07-29 11:03   ` Georg-Johann Lay
  0 siblings, 1 reply; 3+ messages in thread
From: Richard Henderson @ 2011-07-27 16:48 UTC (permalink / raw)
  To: Georg-Johann Lay
  Cc: gcc-patches, Anatoly Sokolov, Denis Chertykov, Eric Weddington

On 07/27/2011 09:12 AM, Georg-Johann Lay wrote:
> 	PR target/49313
> 	* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
> 	(__ctzsi2): Result for 0 may be undefined.
> 	(__ctzhi2): Result for 0 may be undefined.
> 	(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
> 	(__popcountsi2): Ditto. And don't clobber r26.
> 	(__popcountdi2): Ditto. And don't clobber r27.
> 	* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
> 	(parityhi2): New expand.
> 	(paritysi2): New expand.
> 	(popcounthi2): New expand.
> 	(popcountsi2): New expand.
> 	(clzhi2): New expand.
> 	(clzsi2): New expand.
> 	(ctzhi2): New expand.
> 	(ctzsi2): New expand.
> 	(ffshi2): New expand.
> 	(ffssi2): New expand.
> 	(copysignsf2): New insn.
> 	(bswapsi2): New expand.
> 	(*parityhi2.libgcc): New insn.
> 	(*parityqihi2.libgcc): New insn.
> 	(*paritysihi2.libgcc): New insn.
> 	(*popcounthi2.libgcc): New insn.
> 	(*popcountsi2.libgcc): New insn.
> 	(*popcountqi2.libgcc): New insn.
> 	(*popcountqihi2.libgcc): New insn-and-split.
> 	(*clzhi2.libgcc): New insn.
> 	(*clzsihi2.libgcc): New insn.
> 	(*ctzhi2.libgcc): New insn.
> 	(*ctzsihi2.libgcc): New insn.
> 	(*ffshi2.libgcc): New insn.
> 	(*ffssihi2.libgcc): New insn.
> 	(*bswapsi2.libgcc): New insn.

Looks good.


r~

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [Patch,AVR]: PR49313
  2011-07-27 16:48 ` Richard Henderson
@ 2011-07-29 11:03   ` Georg-Johann Lay
  0 siblings, 0 replies; 3+ messages in thread
From: Georg-Johann Lay @ 2011-07-29 11:03 UTC (permalink / raw)
  To: Richard Henderson
  Cc: gcc-patches, Anatoly Sokolov, Denis Chertykov, Eric Weddington

http://gcc.gnu.org/ml/gcc-patches/2011-07/msg02424.html

Richard Henderson wrote:
> On 07/27/2011 09:12 AM, Georg-Johann Lay wrote:
>> 	PR target/49313
>> 	* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
>> 	(__ctzsi2): Result for 0 may be undefined.
>> 	(__ctzhi2): Result for 0 may be undefined.
>> 	(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
>> 	(__popcountsi2): Ditto. And don't clobber r26.
>> 	(__popcountdi2): Ditto. And don't clobber r27.
>> 	* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
>> 	(parityhi2): New expand.
>> 	(paritysi2): New expand.
>> 	(popcounthi2): New expand.
>> 	(popcountsi2): New expand.
>> 	(clzhi2): New expand.
>> 	(clzsi2): New expand.
>> 	(ctzhi2): New expand.
>> 	(ctzsi2): New expand.
>> 	(ffshi2): New expand.
>> 	(ffssi2): New expand.
>> 	(copysignsf2): New insn.
>> 	(bswapsi2): New expand.
>> 	(*parityhi2.libgcc): New insn.
>> 	(*parityqihi2.libgcc): New insn.
>> 	(*paritysihi2.libgcc): New insn.
>> 	(*popcounthi2.libgcc): New insn.
>> 	(*popcountsi2.libgcc): New insn.
>> 	(*popcountqi2.libgcc): New insn.
>> 	(*popcountqihi2.libgcc): New insn-and-split.
>> 	(*clzhi2.libgcc): New insn.
>> 	(*clzsihi2.libgcc): New insn.
>> 	(*ctzhi2.libgcc): New insn.
>> 	(*ctzsihi2.libgcc): New insn.
>> 	(*ffshi2.libgcc): New insn.
>> 	(*ffssihi2.libgcc): New insn.
>> 	(*bswapsi2.libgcc): New insn.
> 
> Looks good.
> 
> 
> r~

http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=176920

Committed with the following changes imposed by
   http://gcc.gnu.org/viewcvs?view=revision&revision=176862
i.e. don't generate zero_extends with hard register and replace

(define_expand ...
...
   (set (match_operand:SI 0 "register_operand" "")
        (zero_extend:SI (reg:HI 24)))]
  ""
  "")

with

(define_expand ...
...
   (set (match_dup 2)
        (reg:HI 24))
   (set (match_operand:SI 0 "register_operand" "")
        (zero_extend:SI (match_dup 2)))]
  ""
  {
    operands[2] = gen_reg_rtx (HImode);
  })

Replacing explicit hard registers in expanders/splits with
insns that have corresponding hard register constraints lead
to extraordinary bad code, see
   http://gcc.gnu.org/ml/gcc-patches/2011-07/msg02412.html
and the following discussion.  Unfortunately, the relevant post
didn't show up in the mail archives yet (since 24 hours now) so
I cannot link it, but obviously Richard received it as he answered
to it.

Passed without regressions.

Johann

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-07-29  9:56 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-27 16:29 [Patch,AVR]: PR49313 Georg-Johann Lay
2011-07-27 16:48 ` Richard Henderson
2011-07-29 11:03   ` Georg-Johann Lay

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).