* [Patch,AVR]: PR49313
@ 2011-07-27 16:29 Georg-Johann Lay
2011-07-27 16:48 ` Richard Henderson
0 siblings, 1 reply; 3+ messages in thread
From: Georg-Johann Lay @ 2011-07-27 16:29 UTC (permalink / raw)
To: gcc-patches
Cc: Anatoly Sokolov, Denis Chertykov, Eric Weddington, Richard Henderson
[-- Attachment #1: Type: text/plain, Size: 1867 bytes --]
This patch is to finalize the work on PR49313, i.e. better libgcc
implementation of some functions like bswap, counting zeros,
parity and popcount.
These functions are already implemented in libgcc.
This patch now provides a better integration of these functions:
the calls are no more emit as ordinary black box calls by optabs,
instead there are insns to describe the exact register usage of
the functions which are represented as implicit library calls.
This is advantageous because some call-clobbered registers are not
touched and there are more leaf-functions.
Some libgcc functions have minor changes to reduce register
footprint.
Besides that, copysignsf3 is implemented which is easy on avr.
Ok to commit?
Johann
PR target/49313
* config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
(__ctzsi2): Result for 0 may be undefined.
(__ctzhi2): Result for 0 may be undefined.
(__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
(__popcountsi2): Ditto. And don't clobber r26.
(__popcountdi2): Ditto. And don't clobber r27.
* config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
(parityhi2): New expand.
(paritysi2): New expand.
(popcounthi2): New expand.
(popcountsi2): New expand.
(clzhi2): New expand.
(clzsi2): New expand.
(ctzhi2): New expand.
(ctzsi2): New expand.
(ffshi2): New expand.
(ffssi2): New expand.
(copysignsf2): New insn.
(bswapsi2): New expand.
(*parityhi2.libgcc): New insn.
(*parityqihi2.libgcc): New insn.
(*paritysihi2.libgcc): New insn.
(*popcounthi2.libgcc): New insn.
(*popcountsi2.libgcc): New insn.
(*popcountqi2.libgcc): New insn.
(*popcountqihi2.libgcc): New insn-and-split.
(*clzhi2.libgcc): New insn.
(*clzsihi2.libgcc): New insn.
(*ctzhi2.libgcc): New insn.
(*ctzsihi2.libgcc): New insn.
(*ffshi2.libgcc): New insn.
(*ffssihi2.libgcc): New insn.
(*bswapsi2.libgcc): New insn.
[-- Attachment #2: builtin-libgcc-insns.diff --]
[-- Type: text/x-patch, Size: 9869 bytes --]
Index: config/avr/libgcc.S
===================================================================
--- config/avr/libgcc.S (revision 176818)
+++ config/avr/libgcc.S (working copy)
@@ -1061,9 +1061,15 @@ ENDF __ffssi2
;; clobbers: r26
DEFUN __ffshi2
clr r26
+#ifdef __AVR_HAVE_JMP_CALL__
+ ;; Some cores have problem skipping 2-word instruction
+ tst r24
+ breq 2f
+#else
cpse r24, __zero_reg__
+#endif /* __AVR_HAVE_JMP_CALL__ */
1: XJMP __loop_ffsqi2
- ldi r26, 8
+2: ldi r26, 8
or r24, r25
brne 1b
ret
@@ -1093,12 +1099,12 @@ ENDF __loop_ffsqi2
#if defined (L_ctzsi2)
;; count trailing zeros
;; r25:r24 = ctz32 (r25:r22)
-;; ctz(0) = 32
+;; clobbers: r26, r22
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
DEFUN __ctzsi2
XCALL __ffssi2
dec r24
- sbrc r24, 7
- ldi r24, 32
ret
ENDF __ctzsi2
#endif /* defined (L_ctzsi2) */
@@ -1106,12 +1112,12 @@ ENDF __ctzsi2
#if defined (L_ctzhi2)
;; count trailing zeros
;; r25:r24 = ctz16 (r25:r24)
-;; ctz(0) = 16
+;; clobbers: r26
+;; ctz(0) = 255
+;; Note that ctz(0) in undefined for GCC
DEFUN __ctzhi2
XCALL __ffshi2
dec r24
- sbrc r24, 7
- ldi r24, 16
ret
ENDF __ctzhi2
#endif /* defined (L_ctzhi2) */
@@ -1245,47 +1251,50 @@ ENDF __parityqi2
#if defined (L_popcounthi2)
;; population count
;; r25:r24 = popcount16 (r25:r24)
-;; clobbers: r30, __tmp_reg__
+;; clobbers: __tmp_reg__
DEFUN __popcounthi2
XCALL __popcountqi2
- mov r30, r24
+ push r24
mov r24, r25
XCALL __popcountqi2
- add r24, r30
clr r25
- ret
+ ;; FALLTHRU
ENDF __popcounthi2
+
+DEFUN __popcounthi2_tail
+ pop __tmp_reg__
+ add r24, __tmp_reg__
+ ret
+ENDF __popcounthi2_tail
#endif /* defined (L_popcounthi2) */
#if defined (L_popcountsi2)
;; population count
;; r25:r24 = popcount32 (r25:r22)
-;; clobbers: r26, r30, __tmp_reg__
+;; clobbers: __tmp_reg__
DEFUN __popcountsi2
XCALL __popcounthi2
- mov r26, r24
+ push r24
mov_l r24, r22
mov_h r25, r23
XCALL __popcounthi2
- add r24, r26
- ret
+ XJMP __popcounthi2_tail
ENDF __popcountsi2
#endif /* defined (L_popcountsi2) */
#if defined (L_popcountdi2)
;; population count
;; r25:r24 = popcount64 (r25:r18)
-;; clobbers: r22, r23, r26, r27, r30, __tmp_reg__
+;; clobbers: r22, r23, __tmp_reg__
DEFUN __popcountdi2
XCALL __popcountsi2
- mov r27, r24
+ push r24
mov_l r22, r18
mov_h r23, r19
mov_l r24, r20
mov_h r25, r21
XCALL __popcountsi2
- add r24, r27
- ret
+ XJMP __popcounthi2_tail
ENDF __popcountdi2
#endif /* defined (L_popcountdi2) */
Index: config/avr/avr.md
===================================================================
--- config/avr/avr.md (revision 176818)
+++ config/avr/avr.md (working copy)
@@ -55,6 +55,7 @@ (define_c_enum "unspec"
UNSPEC_FMUL
UNSPEC_FMULS
UNSPEC_FMULSU
+ UNSPEC_COPYSIGN
])
(define_c_enum "unspecv"
@@ -3680,6 +3681,275 @@ (define_insn "delay_cycles_4"
[(set_attr "length" "9")
(set_attr "cc" "clobber")])
+
+;; Parity
+
+(define_expand "parityhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (parity:HI (reg:HI 24)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "paritysi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (parity:HI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*parityhi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:HI 24)))]
+ ""
+ "%~call __parityhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*parityqihi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:QI 24)))]
+ ""
+ "%~call __parityqi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*paritysihi2.libgcc"
+ [(set (reg:HI 24)
+ (parity:HI (reg:SI 22)))]
+ ""
+ "%~call __paritysi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+
+;; Popcount
+
+(define_expand "popcounthi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (popcount:HI (reg:HI 24)))
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "popcountsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:HI 24)
+ (popcount:HI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*popcounthi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:HI 24)))]
+ ""
+ "%~call __popcounthi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*popcountsi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:SI 22)))]
+ ""
+ "%~call __popcountsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*popcountqi2.libgcc"
+ [(set (reg:QI 24)
+ (popcount:QI (reg:QI 24)))]
+ ""
+ "%~call __popcountqi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn_and_split "*popcountqihi2.libgcc"
+ [(set (reg:HI 24)
+ (popcount:HI (reg:QI 24)))]
+ ""
+ "#"
+ ""
+ [(set (reg:QI 24)
+ (popcount:QI (reg:QI 24)))
+ (set (reg:QI 25)
+ (const_int 0))]
+ "")
+
+;; Count Leading Zeros
+
+(define_expand "clzhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (clz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "clzsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (clz:HI (reg:SI 22)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*clzhi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (clz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __clzhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*clzsihi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (clz:HI (reg:SI 22)))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __clzsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Count Trailing Zeros
+
+(define_expand "ctzhi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ctz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "ctzsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ctz:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*ctzhi2.libgcc"
+ [(set (reg:HI 24)
+ (ctz:HI (reg:HI 24)))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ctzhi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*ctzsihi2.libgcc"
+ [(set (reg:HI 24)
+ (ctz:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))]
+ ""
+ "%~call __ctzsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Find First Set
+
+(define_expand "ffshi2"
+ [(set (reg:HI 24)
+ (match_operand:HI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ffs:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])
+ (set (match_operand:HI 0 "register_operand" "")
+ (reg:HI 24))]
+ ""
+ "")
+
+(define_expand "ffssi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (parallel [(set (reg:HI 24)
+ (ffs:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])
+ (set (match_operand:SI 0 "register_operand" "")
+ (zero_extend:SI (reg:HI 24)))]
+ ""
+ "")
+
+(define_insn "*ffshi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (ffs:HI (reg:HI 24)))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __ffshi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+(define_insn "*ffssihi2.libgcc"
+ [(parallel [(set (reg:HI 24)
+ (ffs:HI (reg:SI 22)))
+ (clobber (reg:QI 22))
+ (clobber (reg:QI 26))])]
+ ""
+ "%~call __ffssi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+;; Copysign
+
+(define_insn "copysignsf3"
+ [(set (match_operand:SF 0 "register_operand" "=r")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "0")
+ (match_operand:SF 2 "register_operand" "r")]
+ UNSPEC_COPYSIGN))]
+ ""
+ "bst %D2,7\;bld %D0,7"
+ [(set_attr "length" "2")
+ (set_attr "cc" "none")])
+
+;; Swap Bytes (change byte-endianess)
+
+(define_expand "bswapsi2"
+ [(set (reg:SI 22)
+ (match_operand:SI 1 "register_operand" ""))
+ (set (reg:SI 22)
+ (bswap:SI (reg:SI 22)))
+ (set (match_operand:SI 0 "register_operand" "")
+ (reg:SI 22))]
+ ""
+ "")
+
+(define_insn "*bswapsi2.libgcc"
+ [(set (reg:SI 22)
+ (bswap:SI (reg:SI 22)))]
+ ""
+ "%~call __bswapsi2"
+ [(set_attr "type" "xcall")
+ (set_attr "cc" "clobber")])
+
+
;; CPU instructions
;; NOP taking 1 or 2 Ticks
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Patch,AVR]: PR49313
2011-07-27 16:29 [Patch,AVR]: PR49313 Georg-Johann Lay
@ 2011-07-27 16:48 ` Richard Henderson
2011-07-29 11:03 ` Georg-Johann Lay
0 siblings, 1 reply; 3+ messages in thread
From: Richard Henderson @ 2011-07-27 16:48 UTC (permalink / raw)
To: Georg-Johann Lay
Cc: gcc-patches, Anatoly Sokolov, Denis Chertykov, Eric Weddington
On 07/27/2011 09:12 AM, Georg-Johann Lay wrote:
> PR target/49313
> * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
> (__ctzsi2): Result for 0 may be undefined.
> (__ctzhi2): Result for 0 may be undefined.
> (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
> (__popcountsi2): Ditto. And don't clobber r26.
> (__popcountdi2): Ditto. And don't clobber r27.
> * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
> (parityhi2): New expand.
> (paritysi2): New expand.
> (popcounthi2): New expand.
> (popcountsi2): New expand.
> (clzhi2): New expand.
> (clzsi2): New expand.
> (ctzhi2): New expand.
> (ctzsi2): New expand.
> (ffshi2): New expand.
> (ffssi2): New expand.
> (copysignsf2): New insn.
> (bswapsi2): New expand.
> (*parityhi2.libgcc): New insn.
> (*parityqihi2.libgcc): New insn.
> (*paritysihi2.libgcc): New insn.
> (*popcounthi2.libgcc): New insn.
> (*popcountsi2.libgcc): New insn.
> (*popcountqi2.libgcc): New insn.
> (*popcountqihi2.libgcc): New insn-and-split.
> (*clzhi2.libgcc): New insn.
> (*clzsihi2.libgcc): New insn.
> (*ctzhi2.libgcc): New insn.
> (*ctzsihi2.libgcc): New insn.
> (*ffshi2.libgcc): New insn.
> (*ffssihi2.libgcc): New insn.
> (*bswapsi2.libgcc): New insn.
Looks good.
r~
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [Patch,AVR]: PR49313
2011-07-27 16:48 ` Richard Henderson
@ 2011-07-29 11:03 ` Georg-Johann Lay
0 siblings, 0 replies; 3+ messages in thread
From: Georg-Johann Lay @ 2011-07-29 11:03 UTC (permalink / raw)
To: Richard Henderson
Cc: gcc-patches, Anatoly Sokolov, Denis Chertykov, Eric Weddington
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg02424.html
Richard Henderson wrote:
> On 07/27/2011 09:12 AM, Georg-Johann Lay wrote:
>> PR target/49313
>> * config/avr/libgcc.S (__ffshi2): Don't skip 2-word instruction.
>> (__ctzsi2): Result for 0 may be undefined.
>> (__ctzhi2): Result for 0 may be undefined.
>> (__popcounthi2): Don't clobber r30. Use __popcounthi2_tail.
>> (__popcountsi2): Ditto. And don't clobber r26.
>> (__popcountdi2): Ditto. And don't clobber r27.
>> * config/avr/avr.md (UNSPEC_COPYSIGN): New c_enum.
>> (parityhi2): New expand.
>> (paritysi2): New expand.
>> (popcounthi2): New expand.
>> (popcountsi2): New expand.
>> (clzhi2): New expand.
>> (clzsi2): New expand.
>> (ctzhi2): New expand.
>> (ctzsi2): New expand.
>> (ffshi2): New expand.
>> (ffssi2): New expand.
>> (copysignsf2): New insn.
>> (bswapsi2): New expand.
>> (*parityhi2.libgcc): New insn.
>> (*parityqihi2.libgcc): New insn.
>> (*paritysihi2.libgcc): New insn.
>> (*popcounthi2.libgcc): New insn.
>> (*popcountsi2.libgcc): New insn.
>> (*popcountqi2.libgcc): New insn.
>> (*popcountqihi2.libgcc): New insn-and-split.
>> (*clzhi2.libgcc): New insn.
>> (*clzsihi2.libgcc): New insn.
>> (*ctzhi2.libgcc): New insn.
>> (*ctzsihi2.libgcc): New insn.
>> (*ffshi2.libgcc): New insn.
>> (*ffssihi2.libgcc): New insn.
>> (*bswapsi2.libgcc): New insn.
>
> Looks good.
>
>
> r~
http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=176920
Committed with the following changes imposed by
http://gcc.gnu.org/viewcvs?view=revision&revision=176862
i.e. don't generate zero_extends with hard register and replace
(define_expand ...
...
(set (match_operand:SI 0 "register_operand" "")
(zero_extend:SI (reg:HI 24)))]
""
"")
with
(define_expand ...
...
(set (match_dup 2)
(reg:HI 24))
(set (match_operand:SI 0 "register_operand" "")
(zero_extend:SI (match_dup 2)))]
""
{
operands[2] = gen_reg_rtx (HImode);
})
Replacing explicit hard registers in expanders/splits with
insns that have corresponding hard register constraints lead
to extraordinary bad code, see
http://gcc.gnu.org/ml/gcc-patches/2011-07/msg02412.html
and the following discussion. Unfortunately, the relevant post
didn't show up in the mail archives yet (since 24 hours now) so
I cannot link it, but obviously Richard received it as he answered
to it.
Passed without regressions.
Johann
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2011-07-29 9:56 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-27 16:29 [Patch,AVR]: PR49313 Georg-Johann Lay
2011-07-27 16:48 ` Richard Henderson
2011-07-29 11:03 ` Georg-Johann Lay
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).