* [PATCH: RL78] Optimize libgcc routines using clrw and clrb
@ 2016-04-05 8:08 Kaushik Phatak
2016-04-06 18:13 ` DJ Delorie
0 siblings, 1 reply; 3+ messages in thread
From: Kaushik Phatak @ 2016-04-05 8:08 UTC (permalink / raw)
To: 'gcc-patches@gcc.gnu.org'; +Cc: DJ Delorie
[-- Attachment #1: Type: text/plain, Size: 5751 bytes --]
Hi,
Please find below a patch that optimizes libgcc routines for the RL78 target.
This is similar to my earlier patch submitted here,
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00415.html
The patch optimizes the loading of immediate value in the case of 0x00, by using the clrw or clrb instruction.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov.
Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.
Best Regards,
Kaushik
p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted.
Apologies for the same.
2016-04-06 Kaushik Phatak <kaushik.phatak@kpit.com>
* config/rl78/bit-count.S: Use clrw/clrb where possible.
* config/rl78/cmpsi2.S: Likewise.
* config/rl78/divmodhi.S Likewise.
* config/rl78/divmodsi.S Likewise.
* config/rl78/fpbit-sf.S Likewise.
* config/rl78/fpmath-sf.S Likewise.
* config/rl78/mulsi3.S Likewise.
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S (revision 3174)
+++ libgcc/config/rl78/bit-count.S (working copy)
@@ -139,7 +139,7 @@
xor1 cy, a.5
xor1 cy, a.6
xor1 cy, a.7
- movw ax, #0
+ clrw ax
bnc $1f
incw ax
1:
@@ -190,7 +190,7 @@
movw ax, sp
addw ax, #4
movw hl, ax
- mov a, #0
+ clrb a
1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
mov x, a
- mov a, #0
+ clrb a
movw r8, ax
ret
END_FUNC ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
;; They differ. Subtract *S2 from *S1 and return as the result.
mov x, a
- mov a, #0
- mov r9, #0
+ clrb a
+ clrb r9
subw ax, r8
1:
movw r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S (revision 3174)
+++ libgcc/config/rl78/divmodhi.S (working copy)
@@ -576,7 +576,7 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
.endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S (revision 3174)
+++ libgcc/config/rl78/divmodsi.S (working copy)
@@ -952,10 +952,10 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S (working copy)
@@ -117,7 +117,7 @@
call $!__int_iszero
bnz $2f
;; At this point, both args are zero.
- mov a, #0
+ clrb a
ret
2:
@@ -151,7 +151,7 @@
bc $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
- mov a, #0
+ clrb a
ret
xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movw r10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrb r8
mov r10, #0x8000
ret
@@ -302,10 +302,10 @@
clr1 a.7
call $!__int_fixunssfsi
- movw ax, #0
+ clrw ax
subw ax, r8
movw r8, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, r10
@@ -410,7 +410,7 @@
set1 a.7
;; Clear B:C:R12:R13
- movw bc, #0
+ clrw bc
movw r12, #0
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
;; If negative convert to positive ...
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, bc
movw bc, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, hl
@@ -533,7 +533,7 @@
bnz $1f
movw ax, bc
cmpw ax, #0
- movw ax, #0
+ clrw ax
bnz $1f
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S (working copy)
@@ -87,7 +87,7 @@
or a, #0x80
mov A_FRAC_H, a
- mov a, #0
+ clrb a
mov A_FRAC_HH, a
;; rounding-bit-shift
@@ -273,7 +273,7 @@
;; "zero out" b
movw ax, A_EXP
movw B_EXP, ax
- movw ax, #0
+ clrw ax
movw B_FRAC_L, ax
movw B_FRAC_H, ax
br $5f
@@ -281,7 +281,7 @@
;; "zero out" a
movw ax, B_EXP
movw A_EXP, ax
- movw ax, #0
+ clrw ax
movw A_FRAC_L, ax
movw A_FRAC_H, ax
@@ -379,7 +379,7 @@
bt a.7, $.L706
;; subtraction was positive
- mov a, #0
+ clrb a
mov A_SIGN, a
br $.L712
@@ -543,7 +543,7 @@
or a, A_FRAC_H
or a, A_FRAC_HH
bnz $1f
- movw ax, #0
+ clrw ax
movw A_EXP, ax
1:
mov a, A_FRAC_H
@@ -682,7 +682,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+4], ax
movw [sp+6], ax
movw [sp+12], ax
@@ -867,7 +867,7 @@
and a, #0x80
mov r11, a
movw r8, #0
- mov r10, #0
+ clrb r10
ret
1:
@@ -930,7 +930,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+0], ax
movw [sp+2], ax
movw [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
movw ax, bc
.Lmul_hisi_top:
- movw bc, #0
+ clrw bc
.Lmul_hisi_loop:
shrw ax, 1
[-- Attachment #2: rl78_libgcc_optimize_clrw.patch --]
[-- Type: application/octet-stream, Size: 4440 bytes --]
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S (revision 3174)
+++ libgcc/config/rl78/bit-count.S (working copy)
@@ -139,7 +139,7 @@
xor1 cy, a.5
xor1 cy, a.6
xor1 cy, a.7
- movw ax, #0
+ clrw ax
bnc $1f
incw ax
1:
@@ -190,7 +190,7 @@
movw ax, sp
addw ax, #4
movw hl, ax
- mov a, #0
+ clrb a
1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
mov x, a
- mov a, #0
+ clrb a
movw r8, ax
ret
END_FUNC ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
;; They differ. Subtract *S2 from *S1 and return as the result.
mov x, a
- mov a, #0
- mov r9, #0
+ clrb a
+ clrb r9
subw ax, r8
1:
movw r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S (revision 3174)
+++ libgcc/config/rl78/divmodhi.S (working copy)
@@ -576,7 +576,7 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
.endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S (revision 3174)
+++ libgcc/config/rl78/divmodsi.S (working copy)
@@ -952,10 +952,10 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S (working copy)
@@ -117,7 +117,7 @@
call $!__int_iszero
bnz $2f
;; At this point, both args are zero.
- mov a, #0
+ clrb a
ret
2:
@@ -151,7 +151,7 @@
bc $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
- mov a, #0
+ clrb a
ret
xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movw r10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrb r8
mov r10, #0x8000
ret
@@ -302,10 +302,10 @@
clr1 a.7
call $!__int_fixunssfsi
- movw ax, #0
+ clrw ax
subw ax, r8
movw r8, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, r10
@@ -410,7 +410,7 @@
set1 a.7
;; Clear B:C:R12:R13
- movw bc, #0
+ clrw bc
movw r12, #0
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
;; If negative convert to positive ...
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, bc
movw bc, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, hl
@@ -533,7 +533,7 @@
bnz $1f
movw ax, bc
cmpw ax, #0
- movw ax, #0
+ clrw ax
bnz $1f
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S (working copy)
@@ -87,7 +87,7 @@
or a, #0x80
mov A_FRAC_H, a
- mov a, #0
+ clrb a
mov A_FRAC_HH, a
;; rounding-bit-shift
@@ -273,7 +273,7 @@
;; "zero out" b
movw ax, A_EXP
movw B_EXP, ax
- movw ax, #0
+ clrw ax
movw B_FRAC_L, ax
movw B_FRAC_H, ax
br $5f
@@ -281,7 +281,7 @@
;; "zero out" a
movw ax, B_EXP
movw A_EXP, ax
- movw ax, #0
+ clrw ax
movw A_FRAC_L, ax
movw A_FRAC_H, ax
@@ -379,7 +379,7 @@
bt a.7, $.L706
;; subtraction was positive
- mov a, #0
+ clrb a
mov A_SIGN, a
br $.L712
@@ -543,7 +543,7 @@
or a, A_FRAC_H
or a, A_FRAC_HH
bnz $1f
- movw ax, #0
+ clrw ax
movw A_EXP, ax
1:
mov a, A_FRAC_H
@@ -682,7 +682,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+4], ax
movw [sp+6], ax
movw [sp+12], ax
@@ -867,7 +867,7 @@
and a, #0x80
mov r11, a
movw r8, #0
- mov r10, #0
+ clrb r10
ret
1:
@@ -930,7 +930,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+0], ax
movw [sp+2], ax
movw [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
movw ax, bc
.Lmul_hisi_top:
- movw bc, #0
+ clrw bc
.Lmul_hisi_loop:
shrw ax, 1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH: RL78] Optimize libgcc routines using clrw and clrb
2016-04-05 8:08 [PATCH: RL78] Optimize libgcc routines using clrw and clrb Kaushik Phatak
@ 2016-04-06 18:13 ` DJ Delorie
0 siblings, 0 replies; 3+ messages in thread
From: DJ Delorie @ 2016-04-06 18:13 UTC (permalink / raw)
To: Kaushik Phatak; +Cc: gcc-patches
Kaushik Phatak <Kaushik.Phatak@kpit.com> writes:
> 2016-04-06 Kaushik Phatak <kaushik.phatak@kpit.com>
>
> * config/rl78/bit-count.S: Use clrw/clrb where possible.
> * config/rl78/cmpsi2.S: Likewise.
> * config/rl78/divmodhi.S Likewise.
> * config/rl78/divmodsi.S Likewise.
> * config/rl78/fpbit-sf.S Likewise.
> * config/rl78/fpmath-sf.S Likewise.
> * config/rl78/mulsi3.S Likewise.
This patch is fine, please apply once gcc is in stage 1 again.
Thanks!
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH: RL78] Optimize libgcc routines using clrw and clrb
@ 2016-02-05 12:57 Kaushik Phatak
0 siblings, 0 replies; 3+ messages in thread
From: Kaushik Phatak @ 2016-02-05 12:57 UTC (permalink / raw)
To: 'gcc-patches@gcc.gnu.org'; +Cc: nick clifton (nickc@redhat.com)
[-- Attachment #1: Type: text/plain, Size: 6998 bytes --]
Hi,
Please find below a simple patch which optimizes the loading of immediate value by using the clrw or clrb
instruction in case a 0x00 is being loaded into the register.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov.
There is a total of about 94 bytes code size improvement with this patch in these libgcc routines.
The following routines have improved code size,
___mulsi3 : 2 bytes
___divsi3 : 20 bytes
___modsi3 : 20 bytes
___divhi3 : 10 bytes
___modhi3 : 10 bytes
___parityqi_internal : 2 bytes
__int_cmpsf : 2 bytes
___fixsfsi : 5 bytes
___fixunssfsi : 2 bytes
___floatsisf : 6 bytes
_int_unpack_sf : 1 bytes
___addsf3 : 5 bytes
__rl78_int_pack_a_r8 : 2 bytes
___mulsf3 : 2 bytes
___divsf3 : 3 bytes
__gcc_bcmp : 2 bytes
I have also attached a draft version of a similar patch (rl78_libgcc_optimize_draft.patch), which goes further and
removes movw immediate to other saddr registers and replaces them with 2 instructions, i.e.
START_FUNC ___modhi3
;; r8 = 4[sp] % 6[sp]
- movw de, #0
+ clrw ax
+ movw de,ax
mov a, [sp+5]
This patch improves code size by 1 byte for each such substitution, however does add an extra clock cycle.
We may consider this patch in case we are purely looking for code size improvement, assuming the libraries
are built with -Os. This shows a total of 134 bytes improvement in code size.
Patch1: rl78_libgcc_optimize_clrw.patch - 94 bytes improvement in code size.
Patch2: rl78_libgcc_optimize_draft.patch - 134 bytes improvement in code size.
Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.
Best Regards,
Kaushik
p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted.
Apologies for the same.
2016-02-05 Kaushik Phatak <kaushik.phatak@kpit.com>
* config/rl78/bit-count.S: Use clrw/clrb where possible.
* config/rl78/cmpsi2.S: Likewise.
* config/rl78/divmodhi.S Likewise.
* config/rl78/divmodsi.S Likewise.
* config/rl78/fpbit-sf.S Likewise.
* config/rl78/fpmath-sf.S Likewise.
* config/rl78/mulsi3.S Likewise.
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S (revision 3174)
+++ libgcc/config/rl78/bit-count.S (working copy)
@@ -139,7 +139,7 @@
xor1 cy, a.5
xor1 cy, a.6
xor1 cy, a.7
- movw ax, #0
+ clrw ax
bnc $1f
incw ax
1:
@@ -190,7 +190,7 @@
movw ax, sp
addw ax, #4
movw hl, ax
- mov a, #0
+ clrb a
1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
mov x, a
- mov a, #0
+ clrb a
movw r8, ax
ret
END_FUNC ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
;; They differ. Subtract *S2 from *S1 and return as the result.
mov x, a
- mov a, #0
- mov r9, #0
+ clrb a
+ clrb r9
subw ax, r8
1:
movw r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S (revision 3174)
+++ libgcc/config/rl78/divmodhi.S (working copy)
@@ -576,7 +576,7 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
.endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S (revision 3174)
+++ libgcc/config/rl78/divmodsi.S (working copy)
@@ -952,10 +952,10 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S (working copy)
@@ -117,7 +117,7 @@
call $!__int_iszero
bnz $2f
;; At this point, both args are zero.
- mov a, #0
+ clrb a
ret
2:
@@ -151,7 +151,7 @@
bc $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
- mov a, #0
+ clrb a
ret
xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movw r10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrb r8
mov r10, #0x8000
ret
@@ -302,10 +302,10 @@
clr1 a.7
call $!__int_fixunssfsi
- movw ax, #0
+ clrw ax
subw ax, r8
movw r8, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, r10
@@ -410,7 +410,7 @@
set1 a.7
;; Clear B:C:R12:R13
- movw bc, #0
+ clrw bc
movw r12, #0
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
;; If negative convert to positive ...
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, bc
movw bc, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, hl
@@ -533,7 +533,7 @@
bnz $1f
movw ax, bc
cmpw ax, #0
- movw ax, #0
+ clrw ax
bnz $1f
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S (working copy)
@@ -87,7 +87,7 @@
or a, #0x80
mov A_FRAC_H, a
- mov a, #0
+ clrb a
mov A_FRAC_HH, a
;; rounding-bit-shift
@@ -273,7 +273,7 @@
;; "zero out" b
movw ax, A_EXP
movw B_EXP, ax
- movw ax, #0
+ clrw ax
movw B_FRAC_L, ax
movw B_FRAC_H, ax
br $5f
@@ -281,7 +281,7 @@
;; "zero out" a
movw ax, B_EXP
movw A_EXP, ax
- movw ax, #0
+ clrw ax
movw A_FRAC_L, ax
movw A_FRAC_H, ax
@@ -379,7 +379,7 @@
bt a.7, $.L706
;; subtraction was positive
- mov a, #0
+ clrb a
mov A_SIGN, a
br $.L712
@@ -543,7 +543,7 @@
or a, A_FRAC_H
or a, A_FRAC_HH
bnz $1f
- movw ax, #0
+ clrw ax
movw A_EXP, ax
1:
mov a, A_FRAC_H
@@ -682,7 +682,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+4], ax
movw [sp+6], ax
movw [sp+12], ax
@@ -867,7 +867,7 @@
and a, #0x80
mov r11, a
movw r8, #0
- mov r10, #0
+ clrb r10
ret
1:
@@ -930,7 +930,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+0], ax
movw [sp+2], ax
movw [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
movw ax, bc
.Lmul_hisi_top:
- movw bc, #0
+ clrw bc
.Lmul_hisi_loop:
shrw ax, 1
[-- Attachment #2: rl78_libgcc_optimize_draft.patch --]
[-- Type: application/octet-stream, Size: 8371 bytes --]
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S (revision 3174)
+++ libgcc/config/rl78/bit-count.S (working copy)
@@ -139,7 +139,7 @@
xor1 cy, a.5
xor1 cy, a.6
xor1 cy, a.7
- movw ax, #0
+ clrw ax
bnc $1f
incw ax
1:
@@ -190,7 +190,7 @@
movw ax, sp
addw ax, #4
movw hl, ax
- mov a, #0
+ clrb a
1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
mov x, a
- mov a, #0
+ clrb a
movw r8, ax
ret
END_FUNC ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -131,7 +131,8 @@
;; SIZE is at [sp+8]
;; Result in r8/r9
- movw r10, #0
+ clrw ax
+ movw r10,ax
1:
;; Compare R10 against the SIZE parameter
movw ax, [sp+8]
@@ -162,8 +163,8 @@
;; They differ. Subtract *S2 from *S1 and return as the result.
mov x, a
- mov a, #0
- mov r9, #0
+ clrb a
+ clrb r9
subw ax, r8
1:
movw r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S (revision 3174)
+++ libgcc/config/rl78/divmodhi.S (working copy)
@@ -413,7 +413,8 @@
num_lt_den\which:
.if \need_result
- movw r8, #0
+ clrw ax
+ movw r8,ax
.else
movw ax, [sp+8]
movw r8, ax
@@ -455,9 +456,11 @@
cmpw ax, #0
bnz $den_not_zero\which
.if \need_result
- movw quot, #0
+ clrw ax
+ movw quot,ax
.else
- movw num, #0
+ clrw ax
+ movw num,ax
.endif
ret
@@ -464,7 +467,8 @@
den_not_zero\which:
.if \need_result
;; zero out quot
- movw quot, #0
+ clrw ax
+ movw quot,ax
.endif
;; initialize bit to 1
@@ -576,7 +580,7 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
.endm
@@ -585,7 +589,8 @@
START_FUNC ___divhi3
;; r8 = 4[sp] / 6[sp]
- movw de, #0
+ clrw ax
+ movw de,ax
mov a, [sp+5]
mov1 cy, a.7
bc $div_signed_num
@@ -640,7 +645,8 @@
START_FUNC ___modhi3
;; r8 = 4[sp] % 6[sp]
- movw de, #0
+ clrw ax
+ movw de,ax
mov a, [sp+5]
mov1 cy, a.7
bc $mod_signed_num
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S (revision 3174)
+++ libgcc/config/rl78/divmodsi.S (working copy)
@@ -604,8 +604,9 @@
num_lt_den\which:
.if \need_result
- movw r8, #0
- movw r10, #0
+ clrw ax
+ movw r8, ax
+ movw r10, ax
.else
movw ax, [sp+8]
movw r8, ax
@@ -617,11 +618,13 @@
shift_den_bit16\which:
movw ax, denL
movw denH, ax
- movw denL, #0
+ clrw ax
+ movw denL, ax
.if \need_result
movw ax, bitL
movw bitH, ax
- movw bitL, #0
+ clrw ax
+ movw bitL, ax
.else
mov a, bit
add a, #16
@@ -687,13 +690,14 @@
or a, denB2
or a, denB3 ; not x
cmpw ax, #0
+ clrw ax
bnz $den_not_zero\which
.if \need_result
- movw quotL, #0
- movw quotH, #0
+ movw quotL, ax
+ movw quotH, ax
.else
- movw numL, #0
- movw numH, #0
+ movw numL, ax
+ movw numH, ax
.endif
br $!main_loop_done_himode\which
@@ -700,8 +704,8 @@
den_not_zero\which:
.if \need_result
;; zero out quot
- movw quotL, #0
- movw quotH, #0
+ movw quotL, ax
+ movw quotH, ax
.endif
;; initialize bit to 1
@@ -952,10 +956,10 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, [hl+2]
@@ -966,7 +970,8 @@
START_FUNC ___divsi3
;; r8 = 4[sp] / 8[sp]
- movw de, #0
+ clrw ax
+ movw de, ax
mov a, [sp+7]
mov1 cy, a.7
bc $div_signed_num
@@ -1022,7 +1027,8 @@
START_FUNC ___modsi3
;; r8 = 4[sp] % 8[sp]
- movw de, #0
+ clrw ax
+ movw de, ax
mov a, [sp+7]
mov1 cy, a.7
bc $mod_signed_num
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S (working copy)
@@ -117,7 +117,7 @@
call $!__int_iszero
bnz $2f
;; At this point, both args are zero.
- mov a, #0
+ clrb a
ret
2:
@@ -151,7 +151,7 @@
bc $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
- mov a, #0
+ clrb a
ret
xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movw r10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrb r8
mov r10, #0x8000
ret
@@ -302,10 +302,10 @@
clr1 a.7
call $!__int_fixunssfsi
- movw ax, #0
+ clrw ax
subw ax, r8
movw r8, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, r10
@@ -355,8 +355,9 @@
bnc $1f
;; Return zero.
-2: movw r8, #0
- movw r10, #0
+2: clrw ax
+ movw r8, ax
+ movw r10, ax
ret
;; An exponent of -1 is either a NaN or infinity.
@@ -410,7 +411,7 @@
set1 a.7
;; Clear B:C:R12:R13
- movw bc, #0
+ clrw bc
movw r12, #0
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +483,10 @@
;; If negative convert to positive ...
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, bc
movw bc, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, hl
@@ -533,7 +534,7 @@
bnz $1f
movw ax, bc
cmpw ax, #0
- movw ax, #0
+ clrw ax
bnz $1f
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S (working copy)
@@ -87,7 +87,7 @@
or a, #0x80
mov A_FRAC_H, a
- mov a, #0
+ clrb a
mov A_FRAC_HH, a
;; rounding-bit-shift
@@ -273,7 +273,7 @@
;; "zero out" b
movw ax, A_EXP
movw B_EXP, ax
- movw ax, #0
+ clrw ax
movw B_FRAC_L, ax
movw B_FRAC_H, ax
br $5f
@@ -281,7 +281,7 @@
;; "zero out" a
movw ax, B_EXP
movw A_EXP, ax
- movw ax, #0
+ clrw ax
movw A_FRAC_L, ax
movw A_FRAC_H, ax
@@ -379,7 +379,7 @@
bt a.7, $.L706
;; subtraction was positive
- mov a, #0
+ clrb a
mov A_SIGN, a
br $.L712
@@ -490,7 +490,8 @@
or a, #0x7f
mov x, #0x80
movw r10, ax
- movw r8, #0
+ clrw ax
+ movw r8, ax
ret
1:
@@ -543,7 +544,7 @@
or a, A_FRAC_H
or a, A_FRAC_HH
bnz $1f
- movw ax, #0
+ clrw ax
movw A_EXP, ax
1:
mov a, A_FRAC_H
@@ -682,7 +683,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+4], ax
movw [sp+6], ax
movw [sp+12], ax
@@ -866,8 +867,9 @@
xor a, b
and a, #0x80
mov r11, a
- movw r8, #0
- mov r10, #0
+ clrw ax
+ movw r8, ax
+ clrb r10
ret
1:
@@ -885,7 +887,8 @@
xor a, b
or a, #0x7f
mov r11, a
- movw r8, #0
+ clrw ax
+ movw r8, ax
mov r10, #0x80
ret
1:
@@ -930,7 +933,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+0], ax
movw [sp+2], ax
movw [sp+12], ax
Index: libgcc/config/rl78/lshrsi3.S
===================================================================
--- libgcc/config/rl78/lshrsi3.S (revision 3174)
+++ libgcc/config/rl78/lshrsi3.S (working copy)
@@ -46,8 +46,9 @@
bc $.Lcount_is_normal
;; count is out of bounds, just return zero.
- movw r8, #0
- movw r10, #0
+ clrw ax
+ movw r8, ax
+ movw r10, ax
ret
.Lcount_is_normal:
@@ -66,7 +67,8 @@
bf a.4, $.Lcount_lt_16
;; count >= 16, shift 16 at a time.
- movw r10, #0
+ clrw ax
+ movw r10, ax
movw ax, [sp+6]
movw r8, ax
mov a, b
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
movw ax, bc
.Lmul_hisi_top:
- movw bc, #0
+ clrw bc
.Lmul_hisi_loop:
shrw ax, 1
@@ -242,7 +242,8 @@
;----------------------------------------------------------------------
START_FUNC ___mulhi3
- movw r8, #0
+ clrw ax
+ movw r8, ax
movw ax, [sp+6]
movw bc, ax
movw ax, [sp+4]
Index: libgcc/config/rl78/signbit.S
===================================================================
--- libgcc/config/rl78/signbit.S (revision 3174)
+++ libgcc/config/rl78/signbit.S (working copy)
@@ -42,7 +42,8 @@
;; X is at [sp+4]..[SP+7]
;; result is in R8..R9
- movw r8, #0
+ clrw ax
+ movw r8, ax
mov a, [sp+7]
mov1 cy, a.7
sknc
@@ -56,7 +57,8 @@
;; X is at [sp+4]..[SP+7]
;; result is in R8..R9
- movw r8, #0
+ clrw ax
+ movw r8, ax
mov a, [sp+11]
mov1 cy, a.7
sknc
[-- Attachment #3: rl78_libgcc_optimize_clrw.patch --]
[-- Type: application/octet-stream, Size: 4440 bytes --]
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S (revision 3174)
+++ libgcc/config/rl78/bit-count.S (working copy)
@@ -139,7 +139,7 @@
xor1 cy, a.5
xor1 cy, a.6
xor1 cy, a.7
- movw ax, #0
+ clrw ax
bnc $1f
incw ax
1:
@@ -190,7 +190,7 @@
movw ax, sp
addw ax, #4
movw hl, ax
- mov a, #0
+ clrb a
1:
xch a, b
mov a, [hl]
@@ -207,7 +207,7 @@
bnz $1b
mov x, a
- mov a, #0
+ clrb a
movw r8, ax
ret
END_FUNC ___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S (revision 3174)
+++ libgcc/config/rl78/cmpsi2.S (working copy)
@@ -162,8 +162,8 @@
;; They differ. Subtract *S2 from *S1 and return as the result.
mov x, a
- mov a, #0
- mov r9, #0
+ clrb a
+ clrb r9
subw ax, r8
1:
movw r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S (revision 3174)
+++ libgcc/config/rl78/divmodhi.S (working copy)
@@ -576,7 +576,7 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
.endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S (revision 3174)
+++ libgcc/config/rl78/divmodsi.S (working copy)
@@ -952,10 +952,10 @@
.macro NEG_AX
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, [hl]
movw [hl], ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S (revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S (working copy)
@@ -117,7 +117,7 @@
call $!__int_iszero
bnz $2f
;; At this point, both args are zero.
- mov a, #0
+ clrb a
ret
2:
@@ -151,7 +151,7 @@
bc $ybig_cmpsf ; branch if X < Y
bnz $xbig_cmpsf ; branch if X > Y
- mov a, #0
+ clrb a
ret
xbig_cmpsf: ; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
movw r10, #0x7fff
ret
;; -inf
-2: mov r8, #0
+2: clrb r8
mov r10, #0x8000
ret
@@ -302,10 +302,10 @@
clr1 a.7
call $!__int_fixunssfsi
- movw ax, #0
+ clrw ax
subw ax, r8
movw r8, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, r10
@@ -410,7 +410,7 @@
set1 a.7
;; Clear B:C:R12:R13
- movw bc, #0
+ clrw bc
movw r12, #0
;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
;; If negative convert to positive ...
movw hl, ax
- movw ax, #0
+ clrw ax
subw ax, bc
movw bc, ax
- movw ax, #0
+ clrw ax
sknc
decw ax
subw ax, hl
@@ -533,7 +533,7 @@
bnz $1f
movw ax, bc
cmpw ax, #0
- movw ax, #0
+ clrw ax
bnz $1f
;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S (revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S (working copy)
@@ -87,7 +87,7 @@
or a, #0x80
mov A_FRAC_H, a
- mov a, #0
+ clrb a
mov A_FRAC_HH, a
;; rounding-bit-shift
@@ -273,7 +273,7 @@
;; "zero out" b
movw ax, A_EXP
movw B_EXP, ax
- movw ax, #0
+ clrw ax
movw B_FRAC_L, ax
movw B_FRAC_H, ax
br $5f
@@ -281,7 +281,7 @@
;; "zero out" a
movw ax, B_EXP
movw A_EXP, ax
- movw ax, #0
+ clrw ax
movw A_FRAC_L, ax
movw A_FRAC_H, ax
@@ -379,7 +379,7 @@
bt a.7, $.L706
;; subtraction was positive
- mov a, #0
+ clrb a
mov A_SIGN, a
br $.L712
@@ -543,7 +543,7 @@
or a, A_FRAC_H
or a, A_FRAC_HH
bnz $1f
- movw ax, #0
+ clrw ax
movw A_EXP, ax
1:
mov a, A_FRAC_H
@@ -682,7 +682,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+4], ax
movw [sp+6], ax
movw [sp+12], ax
@@ -867,7 +867,7 @@
and a, #0x80
mov r11, a
movw r8, #0
- mov r10, #0
+ clrb r10
ret
1:
@@ -930,7 +930,7 @@
movw ax, B_FRAC_H
movw [sp+10], ax
- movw ax, #0
+ clrw ax
movw [sp+0], ax
movw [sp+2], ax
movw [sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S (revision 3174)
+++ libgcc/config/rl78/mulsi3.S (working copy)
@@ -148,7 +148,7 @@
movw ax, bc
.Lmul_hisi_top:
- movw bc, #0
+ clrw bc
.Lmul_hisi_loop:
shrw ax, 1
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2016-04-06 18:13 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-05 8:08 [PATCH: RL78] Optimize libgcc routines using clrw and clrb Kaushik Phatak
2016-04-06 18:13 ` DJ Delorie
-- strict thread matches above, loose matches on Subject: below --
2016-02-05 12:57 Kaushik Phatak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).