[PATCH: RL78] Optimize libgcc routines using clrw and clrb

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH: RL78] Optimize libgcc routines using clrw and clrb
@ 2016-04-05  8:08 Kaushik Phatak
  2016-04-06 18:13 ` DJ Delorie
  0 siblings, 1 reply; 3+ messages in thread
From: Kaushik Phatak @ 2016-04-05  8:08 UTC (permalink / raw)
  To: 'gcc-patches@gcc.gnu.org'; +Cc: DJ Delorie

[-- Attachment #1: Type: text/plain, Size: 5751 bytes --]

Hi,
Please find below a patch that optimizes libgcc routines for the RL78 target.

This is similar to my earlier patch submitted here,
https://gcc.gnu.org/ml/gcc-patches/2016-02/msg00415.html

The patch optimizes the loading of immediate value in the case of 0x00, by using the clrw or clrb instruction.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov.

Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.

Best Regards,
Kaushik

p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted.
Apologies for the same.

2016-04-06  Kaushik Phatak <kaushik.phatak@kpit.com>

	* config/rl78/bit-count.S: Use clrw/clrb where possible.
	* config/rl78/cmpsi2.S: Likewise.
	* config/rl78/divmodhi.S Likewise.
	* config/rl78/divmodsi.S Likewise.
	* config/rl78/fpbit-sf.S Likewise.
	* config/rl78/fpmath-sf.S Likewise.
	* config/rl78/mulsi3.S Likewise.

Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S	(revision 3174)
+++ libgcc/config/rl78/bit-count.S	(working copy)
@@ -139,7 +139,7 @@
 	xor1	cy, a.5
 	xor1	cy, a.6
 	xor1	cy, a.7
-	movw	ax, #0
+	clrw	ax
 	bnc	$1f
 	incw	ax
 1:
@@ -190,7 +190,7 @@
 	movw	ax, sp
 	addw	ax, #4
 	movw	hl, ax
-	mov	a, #0
+	clrb	a
 1:
 	xch	a, b
 	mov	a, [hl]
@@ -207,7 +207,7 @@
 	bnz	$1b
 
 	mov	x, a
-	mov	a, #0
+	clrb	a
 	movw	r8, ax
 	ret	
 END_FUNC	___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S	(revision 3174)
+++ libgcc/config/rl78/cmpsi2.S	(working copy)
@@ -162,8 +162,8 @@
 
 	;; They differ.  Subtract *S2 from *S1 and return as the result.
 	mov	x, a
-	mov	a, #0
-	mov	r9, #0
+	clrb	a
+	clrb	r9
 	subw	ax, r8
 1:
 	movw	r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 3174)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 3174)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S	(working copy)
@@ -117,7 +117,7 @@
 	call	$!__int_iszero
 	bnz	$2f
 	;; At this point, both args are zero.
-	mov	a, #0
+	clrb	a
 	ret
 
 2:
@@ -151,7 +151,7 @@
 	bc	$ybig_cmpsf	; branch if X < Y
 	bnz	$xbig_cmpsf	; branch if X > Y
 
-	mov	a, #0
+	clrb	a
 	ret
 
 xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
 	movw	r10, #0x7fff
 	ret
 	;; -inf
-2:	mov	r8, #0
+2:	clrb	r8
 	mov	r10, #0x8000
 	ret
 	
@@ -302,10 +302,10 @@
 	clr1	a.7
 	call	$!__int_fixunssfsi
 
-	movw	ax, #0
+	clrw	ax
 	subw	ax, r8
 	movw	r8, ax
-	movw	ax, #0
+	clrw	ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
 	set1	a.7
 
 	;; Clear B:C:R12:R13
-	movw	bc, #0
+	clrw	bc
 	movw	r12, #0
 
 	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
 	;; If negative convert to positive ...
 	movw 	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, bc
 	movw	bc, ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, hl
@@ -533,7 +533,7 @@
 	bnz	$1f
 	movw	ax, bc
 	cmpw	ax, #0
-	movw	ax, #0
+	clrw	ax
 	bnz	$1f
 
 	;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S	(working copy)
@@ -87,7 +87,7 @@
 	or	a, #0x80
 	mov	A_FRAC_H, a
 
-	mov	a, #0
+	clrb	a
 	mov	A_FRAC_HH, a
 
 	;; rounding-bit-shift
@@ -273,7 +273,7 @@
 	;; "zero out" b
 	movw	ax, A_EXP
 	movw	B_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	B_FRAC_L, ax
 	movw	B_FRAC_H, ax
 	br	$5f
@@ -281,7 +281,7 @@
 	;; "zero out" a
 	movw	ax, B_EXP
 	movw	A_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	A_FRAC_L, ax
 	movw	A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
 	bt	a.7, $.L706
 	
 	;; subtraction was positive
-	mov	a, #0
+	clrb	a
 	mov	A_SIGN, a
 	br	$.L712
 
@@ -543,7 +543,7 @@
 	or	a, A_FRAC_H
 	or	a, A_FRAC_HH
 	bnz	$1f
-	movw	ax, #0
+	clrw	ax
 	movw	A_EXP, ax
 1:	
 	mov	a, A_FRAC_H
@@ -682,7 +682,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+4], ax
 	movw	[sp+6], ax
 	movw	[sp+12], ax
@@ -867,7 +867,7 @@
 	and	a, #0x80
 	mov	r11, a
 	movw	r8, #0
-	mov	r10, #0
+	clrb	r10
 	ret
 	
 1:	
@@ -930,7 +930,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+0], ax
 	movw	[sp+2], ax
 	movw	[sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S	(revision 3174)
+++ libgcc/config/rl78/mulsi3.S	(working copy)
@@ -148,7 +148,7 @@
 	movw	ax, bc
 
 .Lmul_hisi_top:
-	movw	bc, #0
+	clrw	bc
 
 .Lmul_hisi_loop:
 	shrw	ax, 1


[-- Attachment #2: rl78_libgcc_optimize_clrw.patch --]
[-- Type: application/octet-stream, Size: 4440 bytes --]

Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S	(revision 3174)
+++ libgcc/config/rl78/bit-count.S	(working copy)
@@ -139,7 +139,7 @@
 	xor1	cy, a.5
 	xor1	cy, a.6
 	xor1	cy, a.7
-	movw	ax, #0
+	clrw	ax
 	bnc	$1f
 	incw	ax
 1:
@@ -190,7 +190,7 @@
 	movw	ax, sp
 	addw	ax, #4
 	movw	hl, ax
-	mov	a, #0
+	clrb	a
 1:
 	xch	a, b
 	mov	a, [hl]
@@ -207,7 +207,7 @@
 	bnz	$1b
 
 	mov	x, a
-	mov	a, #0
+	clrb	a
 	movw	r8, ax
 	ret	
 END_FUNC	___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S	(revision 3174)
+++ libgcc/config/rl78/cmpsi2.S	(working copy)
@@ -162,8 +162,8 @@
 
 	;; They differ.  Subtract *S2 from *S1 and return as the result.
 	mov	x, a
-	mov	a, #0
-	mov	r9, #0
+	clrb	a
+	clrb	r9
 	subw	ax, r8
 1:
 	movw	r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 3174)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 3174)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S	(working copy)
@@ -117,7 +117,7 @@
 	call	$!__int_iszero
 	bnz	$2f
 	;; At this point, both args are zero.
-	mov	a, #0
+	clrb	a
 	ret
 
 2:
@@ -151,7 +151,7 @@
 	bc	$ybig_cmpsf	; branch if X < Y
 	bnz	$xbig_cmpsf	; branch if X > Y
 
-	mov	a, #0
+	clrb	a
 	ret
 
 xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
 	movw	r10, #0x7fff
 	ret
 	;; -inf
-2:	mov	r8, #0
+2:	clrb	r8
 	mov	r10, #0x8000
 	ret
 	
@@ -302,10 +302,10 @@
 	clr1	a.7
 	call	$!__int_fixunssfsi
 
-	movw	ax, #0
+	clrw	ax
 	subw	ax, r8
 	movw	r8, ax
-	movw	ax, #0
+	clrw	ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
 	set1	a.7
 
 	;; Clear B:C:R12:R13
-	movw	bc, #0
+	clrw	bc
 	movw	r12, #0
 
 	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
 	;; If negative convert to positive ...
 	movw 	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, bc
 	movw	bc, ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, hl
@@ -533,7 +533,7 @@
 	bnz	$1f
 	movw	ax, bc
 	cmpw	ax, #0
-	movw	ax, #0
+	clrw	ax
 	bnz	$1f
 
 	;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S	(working copy)
@@ -87,7 +87,7 @@
 	or	a, #0x80
 	mov	A_FRAC_H, a
 
-	mov	a, #0
+	clrb	a
 	mov	A_FRAC_HH, a
 
 	;; rounding-bit-shift
@@ -273,7 +273,7 @@
 	;; "zero out" b
 	movw	ax, A_EXP
 	movw	B_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	B_FRAC_L, ax
 	movw	B_FRAC_H, ax
 	br	$5f
@@ -281,7 +281,7 @@
 	;; "zero out" a
 	movw	ax, B_EXP
 	movw	A_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	A_FRAC_L, ax
 	movw	A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
 	bt	a.7, $.L706
 	
 	;; subtraction was positive
-	mov	a, #0
+	clrb	a
 	mov	A_SIGN, a
 	br	$.L712
 
@@ -543,7 +543,7 @@
 	or	a, A_FRAC_H
 	or	a, A_FRAC_HH
 	bnz	$1f
-	movw	ax, #0
+	clrw	ax
 	movw	A_EXP, ax
 1:	
 	mov	a, A_FRAC_H
@@ -682,7 +682,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+4], ax
 	movw	[sp+6], ax
 	movw	[sp+12], ax
@@ -867,7 +867,7 @@
 	and	a, #0x80
 	mov	r11, a
 	movw	r8, #0
-	mov	r10, #0
+	clrb	r10
 	ret
 	
 1:	
@@ -930,7 +930,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+0], ax
 	movw	[sp+2], ax
 	movw	[sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S	(revision 3174)
+++ libgcc/config/rl78/mulsi3.S	(working copy)
@@ -148,7 +148,7 @@
 	movw	ax, bc
 
 .Lmul_hisi_top:
-	movw	bc, #0
+	clrw	bc
 
 .Lmul_hisi_loop:
 	shrw	ax, 1

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH: RL78] Optimize libgcc routines using clrw and clrb
  2016-04-05  8:08 [PATCH: RL78] Optimize libgcc routines using clrw and clrb Kaushik Phatak
@ 2016-04-06 18:13 ` DJ Delorie
  0 siblings, 0 replies; 3+ messages in thread
From: DJ Delorie @ 2016-04-06 18:13 UTC (permalink / raw)
  To: Kaushik Phatak; +Cc: gcc-patches


Kaushik Phatak <Kaushik.Phatak@kpit.com> writes:
> 2016-04-06  Kaushik Phatak <kaushik.phatak@kpit.com>
>
> 	* config/rl78/bit-count.S: Use clrw/clrb where possible.
> 	* config/rl78/cmpsi2.S: Likewise.
> 	* config/rl78/divmodhi.S Likewise.
> 	* config/rl78/divmodsi.S Likewise.
> 	* config/rl78/fpbit-sf.S Likewise.
> 	* config/rl78/fpmath-sf.S Likewise.
> 	* config/rl78/mulsi3.S Likewise.

This patch is fine, please apply once gcc is in stage 1 again.

Thanks!

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH: RL78] Optimize libgcc routines using clrw and clrb
@ 2016-02-05 12:57 Kaushik Phatak
  0 siblings, 0 replies; 3+ messages in thread
From: Kaushik Phatak @ 2016-02-05 12:57 UTC (permalink / raw)
  To: 'gcc-patches@gcc.gnu.org'; +Cc: nick clifton (nickc@redhat.com)

[-- Attachment #1: Type: text/plain, Size: 6998 bytes --]

Hi,
Please find below a simple patch which optimizes the loading of immediate value by using the clrw or clrb 
instruction in case a 0x00 is being loaded into the register.
The patch replaces movw/mov instruction with the smaller clrw/clrb instruction.
The clrw and clrb generates only 1 byte of opcode as compared to 3 or 2 bytes for movw and mov.

There is a total of about 94 bytes code size improvement with this patch in these libgcc routines.

The following routines have improved code size,
___mulsi3   : 2 bytes
___divsi3   : 20 bytes
___modsi3   : 20 bytes
___divhi3   : 10 bytes
___modhi3   : 10 bytes
___parityqi_internal : 2 bytes
__int_cmpsf : 2 bytes
___fixsfsi  : 5 bytes
___fixunssfsi : 2 bytes
___floatsisf  : 6 bytes
_int_unpack_sf : 1 bytes
___addsf3 : 5 bytes
__rl78_int_pack_a_r8 : 2 bytes
___mulsf3  : 2 bytes
___divsf3  : 3 bytes
__gcc_bcmp :  2 bytes


I have also attached a draft version of a similar patch (rl78_libgcc_optimize_draft.patch), which goes further and 
removes movw immediate to other saddr registers and replaces them with 2 instructions, i.e.
 START_FUNC ___modhi3
        ;; r8 = 4[sp] % 6[sp]
-       movw    de, #0
+       clrw    ax
+       movw    de,ax
        mov     a, [sp+5]

This patch improves code size by 1 byte for each such substitution, however does add an extra clock cycle.

We may consider this patch in case we are purely looking for code size improvement, assuming the libraries
are built with -Os. This shows a total of 134 bytes improvement in code size.

Patch1: rl78_libgcc_optimize_clrw.patch - 94 bytes improvement in code size.
Patch2: rl78_libgcc_optimize_draft.patch - 134 bytes improvement in code size.

Kindly review this patch and let me know what you think.
This is regression tested for rl78 -msim.

Best Regards,
Kaushik

p.s. Kindly ignore any disclaimers at end of this e-mail as they are auto-inserted.
Apologies for the same.

2016-02-05  Kaushik Phatak <kaushik.phatak@kpit.com>

        * config/rl78/bit-count.S: Use clrw/clrb where possible.
		* config/rl78/cmpsi2.S: Likewise.
		* config/rl78/divmodhi.S Likewise.
		* config/rl78/divmodsi.S Likewise.
		* config/rl78/fpbit-sf.S Likewise.
		* config/rl78/fpmath-sf.S Likewise.
		* config/rl78/mulsi3.S Likewise.
		
Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S	(revision 3174)
+++ libgcc/config/rl78/bit-count.S	(working copy)
@@ -139,7 +139,7 @@
 	xor1	cy, a.5
 	xor1	cy, a.6
 	xor1	cy, a.7
-	movw	ax, #0
+	clrw	ax
 	bnc	$1f
 	incw	ax
 1:
@@ -190,7 +190,7 @@
 	movw	ax, sp
 	addw	ax, #4
 	movw	hl, ax
-	mov	a, #0
+	clrb	a
 1:
 	xch	a, b
 	mov	a, [hl]
@@ -207,7 +207,7 @@
 	bnz	$1b
 
 	mov	x, a
-	mov	a, #0
+	clrb	a
 	movw	r8, ax
 	ret	
 END_FUNC	___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S	(revision 3174)
+++ libgcc/config/rl78/cmpsi2.S	(working copy)
@@ -162,8 +162,8 @@
 
 	;; They differ.  Subtract *S2 from *S1 and return as the result.
 	mov	x, a
-	mov	a, #0
-	mov	r9, #0
+	clrb	a
+	clrb	r9
 	subw	ax, r8
 1:
 	movw	r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 3174)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 3174)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S	(working copy)
@@ -117,7 +117,7 @@
 	call	$!__int_iszero
 	bnz	$2f
 	;; At this point, both args are zero.
-	mov	a, #0
+	clrb	a
 	ret
 
 2:
@@ -151,7 +151,7 @@
 	bc	$ybig_cmpsf	; branch if X < Y
 	bnz	$xbig_cmpsf	; branch if X > Y
 
-	mov	a, #0
+	clrb	a
 	ret
 
 xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
 	movw	r10, #0x7fff
 	ret
 	;; -inf
-2:	mov	r8, #0
+2:	clrb	r8
 	mov	r10, #0x8000
 	ret
 	
@@ -302,10 +302,10 @@
 	clr1	a.7
 	call	$!__int_fixunssfsi
 
-	movw	ax, #0
+	clrw	ax
 	subw	ax, r8
 	movw	r8, ax
-	movw	ax, #0
+	clrw	ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
 	set1	a.7
 
 	;; Clear B:C:R12:R13
-	movw	bc, #0
+	clrw	bc
 	movw	r12, #0
 
 	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
 	;; If negative convert to positive ...
 	movw 	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, bc
 	movw	bc, ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, hl
@@ -533,7 +533,7 @@
 	bnz	$1f
 	movw	ax, bc
 	cmpw	ax, #0
-	movw	ax, #0
+	clrw	ax
 	bnz	$1f
 
 	;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S	(working copy)
@@ -87,7 +87,7 @@
 	or	a, #0x80
 	mov	A_FRAC_H, a
 
-	mov	a, #0
+	clrb	a
 	mov	A_FRAC_HH, a
 
 	;; rounding-bit-shift
@@ -273,7 +273,7 @@
 	;; "zero out" b
 	movw	ax, A_EXP
 	movw	B_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	B_FRAC_L, ax
 	movw	B_FRAC_H, ax
 	br	$5f
@@ -281,7 +281,7 @@
 	;; "zero out" a
 	movw	ax, B_EXP
 	movw	A_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	A_FRAC_L, ax
 	movw	A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
 	bt	a.7, $.L706
 	
 	;; subtraction was positive
-	mov	a, #0
+	clrb	a
 	mov	A_SIGN, a
 	br	$.L712
 
@@ -543,7 +543,7 @@
 	or	a, A_FRAC_H
 	or	a, A_FRAC_HH
 	bnz	$1f
-	movw	ax, #0
+	clrw	ax
 	movw	A_EXP, ax
 1:	
 	mov	a, A_FRAC_H
@@ -682,7 +682,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+4], ax
 	movw	[sp+6], ax
 	movw	[sp+12], ax
@@ -867,7 +867,7 @@
 	and	a, #0x80
 	mov	r11, a
 	movw	r8, #0
-	mov	r10, #0
+	clrb	r10
 	ret
 	
 1:	
@@ -930,7 +930,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+0], ax
 	movw	[sp+2], ax
 	movw	[sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S	(revision 3174)
+++ libgcc/config/rl78/mulsi3.S	(working copy)
@@ -148,7 +148,7 @@
 	movw	ax, bc
 
 .Lmul_hisi_top:
-	movw	bc, #0
+	clrw	bc
 
 .Lmul_hisi_loop:
 	shrw	ax, 1


[-- Attachment #2: rl78_libgcc_optimize_draft.patch --]
[-- Type: application/octet-stream, Size: 8371 bytes --]

Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S	(revision 3174)
+++ libgcc/config/rl78/bit-count.S	(working copy)
@@ -139,7 +139,7 @@
 	xor1	cy, a.5
 	xor1	cy, a.6
 	xor1	cy, a.7
-	movw	ax, #0
+	clrw	ax
 	bnc	$1f
 	incw	ax
 1:
@@ -190,7 +190,7 @@
 	movw	ax, sp
 	addw	ax, #4
 	movw	hl, ax
-	mov	a, #0
+	clrb	a
 1:
 	xch	a, b
 	mov	a, [hl]
@@ -207,7 +207,7 @@
 	bnz	$1b
 
 	mov	x, a
-	mov	a, #0
+	clrb	a
 	movw	r8, ax
 	ret	
 END_FUNC	___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S	(revision 3174)
+++ libgcc/config/rl78/cmpsi2.S	(working copy)
@@ -131,7 +131,8 @@
 	;; SIZE is at [sp+8]
 	;; Result in r8/r9
 	
-        movw	r10, #0
+	clrw	ax
+	movw	r10,ax
 1:
 	;; Compare R10 against the SIZE parameter
         movw	ax, [sp+8]
@@ -162,8 +163,8 @@
 
 	;; They differ.  Subtract *S2 from *S1 and return as the result.
 	mov	x, a
-	mov	a, #0
-	mov	r9, #0
+	clrb	a
+	clrb	r9
 	subw	ax, r8
 1:
 	movw	r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 3174)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -413,7 +413,8 @@
 
 num_lt_den\which:
 	.if \need_result
-	movw	r8, #0
+	clrw 	ax
+	movw    r8,ax
 	.else
 	movw	ax, [sp+8]
 	movw	r8, ax
@@ -455,9 +456,11 @@
 	cmpw	ax, #0
 	bnz	$den_not_zero\which
 	.if \need_result
-	movw    quot, #0
+	clrw	ax
+	movw	quot,ax
 	.else
-	movw	num, #0
+	clrw	ax
+	movw	num,ax
 	.endif
 	ret
 
@@ -464,7 +467,8 @@
 den_not_zero\which:
 	.if \need_result
 	;; zero out quot
-	movw	quot, #0
+	clrw	ax
+	movw	quot,ax
 	.endif
 
 	;; initialize bit to 1
@@ -576,7 +580,7 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw    ax
 	subw	ax, [hl]
 	movw	[hl], ax
 .endm
@@ -585,7 +589,8 @@
 
 START_FUNC ___divhi3
 	;; r8 = 4[sp] / 6[sp]
-	movw	de, #0
+	clrw	ax
+	movw	de,ax
 	mov	a, [sp+5]
 	mov1	cy, a.7
 	bc	$div_signed_num
@@ -640,7 +645,8 @@
 
 START_FUNC ___modhi3
 	;; r8 = 4[sp] % 6[sp]
-	movw	de, #0
+	clrw	ax
+	movw	de,ax
 	mov	a, [sp+5]
 	mov1	cy, a.7
 	bc	$mod_signed_num
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 3174)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -604,8 +604,9 @@
 
 num_lt_den\which:
 	.if \need_result
-	movw	r8, #0
-	movw	r10, #0
+	clrw	ax
+	movw	r8, ax
+	movw	r10, ax
 	.else
 	movw	ax, [sp+8]
 	movw	r8, ax
@@ -617,11 +618,13 @@
 shift_den_bit16\which:
 	movw	ax, denL
 	movw	denH, ax
-	movw	denL, #0
+	clrw	ax
+	movw	denL, ax
 	.if \need_result
 	movw	ax, bitL
 	movw	bitH, ax
-	movw	bitL, #0
+	clrw	ax
+	movw	bitL, ax
 	.else
 	mov	a, bit
 	add	a, #16
@@ -687,13 +690,14 @@
 	or	a, denB2
 	or	a, denB3	; not x
 	cmpw	ax, #0
+	clrw	ax
 	bnz	$den_not_zero\which
 	.if \need_result
-	movw	quotL, #0
-	movw	quotH, #0
+	movw	quotL, ax
+	movw	quotH, ax
 	.else
-	movw	numL, #0
-	movw	numH, #0
+	movw	numL, ax
+	movw	numH, ax
 	.endif
 	br	$!main_loop_done_himode\which
 
@@ -700,8 +704,8 @@
 den_not_zero\which:
 	.if \need_result
 	;; zero out quot
-	movw	quotL, #0
-	movw	quotH, #0
+	movw	quotL, ax
+	movw	quotH, ax
 	.endif
 
 	;; initialize bit to 1
@@ -952,10 +956,10 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, [hl+2]
@@ -966,7 +970,8 @@
 
 START_FUNC ___divsi3
 	;; r8 = 4[sp] / 8[sp]
-	movw	de, #0
+	clrw	ax
+	movw	de, ax
 	mov	a, [sp+7]
 	mov1	cy, a.7
 	bc	$div_signed_num
@@ -1022,7 +1027,8 @@
 
 START_FUNC ___modsi3
 	;; r8 = 4[sp] % 8[sp]
-	movw	de, #0
+	clrw	ax
+	movw	de, ax
 	mov	a, [sp+7]
 	mov1	cy, a.7
 	bc	$mod_signed_num
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S	(working copy)
@@ -117,7 +117,7 @@
 	call	$!__int_iszero
 	bnz	$2f
 	;; At this point, both args are zero.
-	mov	a, #0
+	clrb	a
 	ret
 
 2:
@@ -151,7 +151,7 @@
 	bc	$ybig_cmpsf	; branch if X < Y
 	bnz	$xbig_cmpsf	; branch if X > Y
 
-	mov	a, #0
+	clrb	a
 	ret
 
 xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
 	movw	r10, #0x7fff
 	ret
 	;; -inf
-2:	mov	r8, #0
+2:	clrb	r8
 	mov	r10, #0x8000
 	ret
 	
@@ -302,10 +302,10 @@
 	clr1	a.7
 	call	$!__int_fixunssfsi
 
-	movw	ax, #0
+	clrw	ax
 	subw	ax, r8
 	movw	r8, ax
-	movw	ax, #0
+	clrw	ax
         sknc
         decw    ax
         subw    ax, r10
@@ -355,8 +355,9 @@
 	bnc	$1f
 
 	;; Return zero.
-2:	movw	r8, #0
-	movw	r10, #0
+2:	clrw	ax
+	movw	r8, ax
+	movw	r10, ax
 	ret
 
 	;; An exponent of -1 is either a NaN or infinity.
@@ -410,7 +411,7 @@
 	set1	a.7
 
 	;; Clear B:C:R12:R13
-	movw	bc, #0
+	clrw	bc
 	movw	r12, #0
 
 	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +483,10 @@
 
 	;; If negative convert to positive ...
 	movw 	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, bc
 	movw	bc, ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, hl
@@ -533,7 +534,7 @@
 	bnz	$1f
 	movw	ax, bc
 	cmpw	ax, #0
-	movw	ax, #0
+	clrw	ax
 	bnz	$1f
 
 	;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S	(working copy)
@@ -87,7 +87,7 @@
 	or	a, #0x80
 	mov	A_FRAC_H, a
 
-	mov	a, #0
+	clrb	a
 	mov	A_FRAC_HH, a
 
 	;; rounding-bit-shift
@@ -273,7 +273,7 @@
 	;; "zero out" b
 	movw	ax, A_EXP
 	movw	B_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	B_FRAC_L, ax
 	movw	B_FRAC_H, ax
 	br	$5f
@@ -281,7 +281,7 @@
 	;; "zero out" a
 	movw	ax, B_EXP
 	movw	A_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	A_FRAC_L, ax
 	movw	A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
 	bt	a.7, $.L706
 	
 	;; subtraction was positive
-	mov	a, #0
+	clrb	a
 	mov	A_SIGN, a
 	br	$.L712
 
@@ -490,7 +490,8 @@
 	or	a, #0x7f
 	mov	x, #0x80
 	movw	r10, ax
-	movw	r8, #0
+	clrw	ax
+	movw	r8, ax
 	ret
 
 1:
@@ -543,7 +544,7 @@
 	or	a, A_FRAC_H
 	or	a, A_FRAC_HH
 	bnz	$1f
-	movw	ax, #0
+	clrw	ax
 	movw	A_EXP, ax
 1:	
 	mov	a, A_FRAC_H
@@ -682,7 +683,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+4], ax
 	movw	[sp+6], ax
 	movw	[sp+12], ax
@@ -866,8 +867,9 @@
 	xor	a, b
 	and	a, #0x80
 	mov	r11, a
-	movw	r8, #0
-	mov	r10, #0
+	clrw	ax
+	movw	r8, ax
+	clrb	r10
 	ret
 	
 1:	
@@ -885,7 +887,8 @@
 	xor	a, b
 	or	a, #0x7f
 	mov	r11, a
-	movw	r8, #0
+	clrw	ax
+	movw	r8, ax
 	mov	r10, #0x80
 	ret
 1:	
@@ -930,7 +933,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+0], ax
 	movw	[sp+2], ax
 	movw	[sp+12], ax
Index: libgcc/config/rl78/lshrsi3.S
===================================================================
--- libgcc/config/rl78/lshrsi3.S	(revision 3174)
+++ libgcc/config/rl78/lshrsi3.S	(working copy)
@@ -46,8 +46,9 @@
 	bc	$.Lcount_is_normal
 
 	;; count is out of bounds, just return zero.
-	movw	r8, #0
-	movw	r10, #0
+	clrw	ax
+	movw	r8, ax
+	movw	r10, ax
 	ret
 
 .Lcount_is_normal:
@@ -66,7 +67,8 @@
 	bf	a.4, $.Lcount_lt_16
 
 	;; count >= 16, shift 16 at a time.
-	movw	r10, #0
+	clrw	ax
+	movw	r10, ax
 	movw	ax, [sp+6]
 	movw	r8, ax
 	mov	a, b
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S	(revision 3174)
+++ libgcc/config/rl78/mulsi3.S	(working copy)
@@ -148,7 +148,7 @@
 	movw	ax, bc
 
 .Lmul_hisi_top:
-	movw	bc, #0
+	clrw	bc
 
 .Lmul_hisi_loop:
 	shrw	ax, 1
@@ -242,7 +242,8 @@
 ;----------------------------------------------------------------------
 
 START_FUNC ___mulhi3
-	movw	r8, #0
+	clrw	ax
+	movw	r8, ax
 	movw	ax, [sp+6]
 	movw	bc, ax
 	movw	ax, [sp+4]
Index: libgcc/config/rl78/signbit.S
===================================================================
--- libgcc/config/rl78/signbit.S	(revision 3174)
+++ libgcc/config/rl78/signbit.S	(working copy)
@@ -42,7 +42,8 @@
 	;; X is at [sp+4]..[SP+7]
 	;; result is in R8..R9
 
-	movw	r8, #0
+	clrw	ax
+	movw	r8, ax
 	mov	a, [sp+7]
 	mov1	cy, a.7
 	sknc
@@ -56,7 +57,8 @@
 	;; X is at [sp+4]..[SP+7]
 	;; result is in R8..R9
 
-	movw	r8, #0
+	clrw	ax
+	movw	r8, ax
 	mov	a, [sp+11]
 	mov1	cy, a.7
 	sknc

[-- Attachment #3: rl78_libgcc_optimize_clrw.patch --]
[-- Type: application/octet-stream, Size: 4440 bytes --]

Index: libgcc/config/rl78/bit-count.S
===================================================================
--- libgcc/config/rl78/bit-count.S	(revision 3174)
+++ libgcc/config/rl78/bit-count.S	(working copy)
@@ -139,7 +139,7 @@
 	xor1	cy, a.5
 	xor1	cy, a.6
 	xor1	cy, a.7
-	movw	ax, #0
+	clrw	ax
 	bnc	$1f
 	incw	ax
 1:
@@ -190,7 +190,7 @@
 	movw	ax, sp
 	addw	ax, #4
 	movw	hl, ax
-	mov	a, #0
+	clrb	a
 1:
 	xch	a, b
 	mov	a, [hl]
@@ -207,7 +207,7 @@
 	bnz	$1b
 
 	mov	x, a
-	mov	a, #0
+	clrb	a
 	movw	r8, ax
 	ret	
 END_FUNC	___popcountqi_internal
Index: libgcc/config/rl78/cmpsi2.S
===================================================================
--- libgcc/config/rl78/cmpsi2.S	(revision 3174)
+++ libgcc/config/rl78/cmpsi2.S	(working copy)
@@ -162,8 +162,8 @@
 
 	;; They differ.  Subtract *S2 from *S1 and return as the result.
 	mov	x, a
-	mov	a, #0
-	mov	r9, #0
+	clrb	a
+	clrb	r9
 	subw	ax, r8
 1:
 	movw	r8, ax
Index: libgcc/config/rl78/divmodhi.S
===================================================================
--- libgcc/config/rl78/divmodhi.S	(revision 3174)
+++ libgcc/config/rl78/divmodhi.S	(working copy)
@@ -576,7 +576,7 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
 .endm
Index: libgcc/config/rl78/divmodsi.S
===================================================================
--- libgcc/config/rl78/divmodsi.S	(revision 3174)
+++ libgcc/config/rl78/divmodsi.S	(working copy)
@@ -952,10 +952,10 @@
 
 .macro NEG_AX
 	movw	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, [hl]
 	movw	[hl], ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, [hl+2]
Index: libgcc/config/rl78/fpbit-sf.S
===================================================================
--- libgcc/config/rl78/fpbit-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpbit-sf.S	(working copy)
@@ -117,7 +117,7 @@
 	call	$!__int_iszero
 	bnz	$2f
 	;; At this point, both args are zero.
-	mov	a, #0
+	clrb	a
 	ret
 
 2:
@@ -151,7 +151,7 @@
 	bc	$ybig_cmpsf	; branch if X < Y
 	bnz	$xbig_cmpsf	; branch if X > Y
 
-	mov	a, #0
+	clrb	a
 	ret
 
 xbig_cmpsf:			; |X| > |Y| so return A = 1 if pos, 0xff if neg
@@ -285,7 +285,7 @@
 	movw	r10, #0x7fff
 	ret
 	;; -inf
-2:	mov	r8, #0
+2:	clrb	r8
 	mov	r10, #0x8000
 	ret
 	
@@ -302,10 +302,10 @@
 	clr1	a.7
 	call	$!__int_fixunssfsi
 
-	movw	ax, #0
+	clrw	ax
 	subw	ax, r8
 	movw	r8, ax
-	movw	ax, #0
+	clrw	ax
         sknc
         decw    ax
         subw    ax, r10
@@ -410,7 +410,7 @@
 	set1	a.7
 
 	;; Clear B:C:R12:R13
-	movw	bc, #0
+	clrw	bc
 	movw	r12, #0
 
 	;; Shift bits from the mantissa (A:X:R10) into (B:C:R12:R13),
@@ -482,10 +482,10 @@
 
 	;; If negative convert to positive ...
 	movw 	hl, ax
-	movw	ax, #0
+	clrw	ax
 	subw	ax, bc
 	movw	bc, ax
-	movw	ax, #0
+	clrw	ax
 	sknc
 	decw	ax
 	subw	ax, hl
@@ -533,7 +533,7 @@
 	bnz	$1f
 	movw	ax, bc
 	cmpw	ax, #0
-	movw	ax, #0
+	clrw	ax
 	bnz	$1f
 
 	;; Return 0.0
Index: libgcc/config/rl78/fpmath-sf.S
===================================================================
--- libgcc/config/rl78/fpmath-sf.S	(revision 3174)
+++ libgcc/config/rl78/fpmath-sf.S	(working copy)
@@ -87,7 +87,7 @@
 	or	a, #0x80
 	mov	A_FRAC_H, a
 
-	mov	a, #0
+	clrb	a
 	mov	A_FRAC_HH, a
 
 	;; rounding-bit-shift
@@ -273,7 +273,7 @@
 	;; "zero out" b
 	movw	ax, A_EXP
 	movw	B_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	B_FRAC_L, ax
 	movw	B_FRAC_H, ax
 	br	$5f
@@ -281,7 +281,7 @@
 	;; "zero out" a
 	movw	ax, B_EXP
 	movw	A_EXP, ax
-	movw	ax, #0
+	clrw	ax
 	movw	A_FRAC_L, ax
 	movw	A_FRAC_H, ax
 
@@ -379,7 +379,7 @@
 	bt	a.7, $.L706
 	
 	;; subtraction was positive
-	mov	a, #0
+	clrb	a
 	mov	A_SIGN, a
 	br	$.L712
 
@@ -543,7 +543,7 @@
 	or	a, A_FRAC_H
 	or	a, A_FRAC_HH
 	bnz	$1f
-	movw	ax, #0
+	clrw	ax
 	movw	A_EXP, ax
 1:	
 	mov	a, A_FRAC_H
@@ -682,7 +682,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+4], ax
 	movw	[sp+6], ax
 	movw	[sp+12], ax
@@ -867,7 +867,7 @@
 	and	a, #0x80
 	mov	r11, a
 	movw	r8, #0
-	mov	r10, #0
+	clrb	r10
 	ret
 	
 1:	
@@ -930,7 +930,7 @@
 	movw	ax, B_FRAC_H
 	movw	[sp+10], ax
 
-	movw	ax, #0
+	clrw	ax
 	movw	[sp+0], ax
 	movw	[sp+2], ax
 	movw	[sp+12], ax
Index: libgcc/config/rl78/mulsi3.S
===================================================================
--- libgcc/config/rl78/mulsi3.S	(revision 3174)
+++ libgcc/config/rl78/mulsi3.S	(working copy)
@@ -148,7 +148,7 @@
 	movw	ax, bc
 
 .Lmul_hisi_top:
-	movw	bc, #0
+	clrw	bc
 
 .Lmul_hisi_loop:
 	shrw	ax, 1

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2016-04-06 18:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-04-05  8:08 [PATCH: RL78] Optimize libgcc routines using clrw and clrb Kaushik Phatak
2016-04-06 18:13 ` DJ Delorie
  -- strict thread matches above, loose matches on Subject: below --
2016-02-05 12:57 Kaushik Phatak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).