RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-09-04  2:16 Tony Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-09-04  2:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan

[-- Attachment #1: Type: text/plain, Size: 1162 bytes --]

Ping 2?

> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Thursday, August 28, 2014 2:02 PM
> To: 'gcc-patches@gcc.gnu.org'
> Cc: Richard Earnshaw; Ramana Radhakrishnan
> Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> 
> Ping?
> 
> > -----Original Message-----
> > From: Tony Wang [mailto:tony.wang@arm.com]
> > Sent: Thursday, August 21, 2014 2:15 PM
> > To: 'gcc-patches@gcc.gnu.org'
> > Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> >
> > Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate veneer
> > when the two section is too far away from each other. Also, I have both manually and using some test cases
to
> > verify that IP and PSR are not alive at such point.
> >
> > gcc/libgcc/ChangeLog:
> > 2014-8-21   Tony Wang <tony.wang@arm.com>
> >
> >         * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> >         * config/arm/ieee754-df.S: Same with above
> >
> > BR,
> > Tony

[-- Attachment #2: libgcc_mul_div_code_size_reduction_2.diff --]
[-- Type: application/octet-stream, Size: 9360 bytes --]

diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index 406bb70..ecdd46f 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
 
 #ifdef L_arm_muldivdf3
 
-ARM_FUNC_START muldf3
+ARM_FUNC_START muldf3, function_section
 ARM_FUNC_ALIAS aeabi_dmul muldf3
 	do_push	{r4, r5, r6, lr}
 
@@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
 	COND(and,s,ne)	r5, ip, yh, lsr #20
 	teqne	r4, ip
 	teqne	r5, ip
-	bleq	LSYM(Lml_s)
+	bleq	Lml_s
 
 	@ Add exponents together
 	add	r4, r4, r5
@@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
 	subs	ip, r4, #(254 - 1)
 	do_it	hi
 	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	lr, #0x80000000
@@ -716,9 +716,12 @@ LSYM(Lml_1):
 	mov	lr, #0
 	subs	r4, r4, #1
 
-LSYM(Lml_u):
+	FUNC_END aeabi_dmul
+	FUNC_END muldf3
+
+ARM_SYM_START Lml_u
 	@ Overflow?
-	bgt	LSYM(Lml_o)
+	bgt	Lml_o
 
 	@ Check if denormalized result is possible, otherwise return signed 0.
 	cmn	r4, #(53 + 1)
@@ -778,10 +781,11 @@ LSYM(Lml_u):
 	do_it	eq
 	biceq	xl, xl, r3, lsr #31
 	RETLDM	"r4, r5, r6"
+	SYM_END Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
 	teq	r4, #0
 	bne	2f
 	and	r6, xh, #0x80000000
@@ -804,8 +808,9 @@ LSYM(Lml_d):
 	beq	3b
 	orr	yh, yh, r6
 	RET
+	SYM_END Lml_d
 
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
 	@ Isolate the INF and NAN cases away
 	teq	r4, ip
 	and	r5, ip, yh, lsr #20
@@ -817,10 +822,11 @@ LSYM(Lml_s):
 	orrs	r6, xl, xh, lsl #1
 	do_it	ne
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
+	bne	Lml_d
+	SYM_END Lml_s
 
 	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
 	eor	xh, xh, yh
 	and	xh, xh, #0x80000000
 	mov	xl, #0
@@ -832,41 +838,42 @@ LSYM(Lml_z):
 	moveq	xl, yl
 	moveq	xh, yh
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	beq	Lml_n		@ 0 * INF or INF * 0 -> NAN
 	teq	r4, ip
 	bne	1f
 	orrs	r6, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+	bne	Lml_n		@ NAN * <anything> -> NAN
 1:	teq	r5, ip
-	bne	LSYM(Lml_i)
+	bne	Lml_i
 	orrs	r6, yl, yh, lsl #12
 	do_it	ne, t
 	movne	xl, yl
 	movne	xh, yh
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+	bne	Lml_n		@ <anything> * NAN -> NAN
+	SYM_END Lml_z
 
 	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
 	eor	xh, xh, yh
+	SYM_END Lml_i
 
 	@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
 	and	xh, xh, #0x80000000
 	orr	xh, xh, #0x7f000000
 	orr	xh, xh, #0x00f00000
 	mov	xl, #0
 	RETLDM	"r4, r5, r6"
+	SYM_END Lml_o
 
 	@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
 	orr	xh, xh, #0x7f000000
 	orr	xh, xh, #0x00f80000
 	RETLDM	"r4, r5, r6"
+	SYM_END Lml_n
 
-	FUNC_END aeabi_dmul
-	FUNC_END muldf3
-
-ARM_FUNC_START divdf3
+ARM_FUNC_START divdf3 function_section
 ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	
 	do_push	{r4, r5, r6, lr}
@@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	subs	ip, r4, #(254 - 1)
 	do_it	hi
 	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	subs	ip, r5, yh
@@ -1009,13 +1016,13 @@ LSYM(Ldv_1):
 	orr	xh, xh, #0x00100000
 	mov	lr, #0
 	subs	r4, r4, #1
-	b	LSYM(Lml_u)
+	b	Lml_u
 
 	@ Result mightt need to be denormalized: put remainder bits
 	@ in lr for rounding considerations.
 LSYM(Ldv_u):
 	orr	lr, r5, r6
-	b	LSYM(Lml_u)
+	b	Lml_u
 
 	@ One or both arguments is either INF, NAN or zero.
 LSYM(Ldv_s):
@@ -1023,34 +1030,34 @@ LSYM(Ldv_s):
 	teq	r4, ip
 	do_it	eq
 	teqeq	r5, ip
-	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
+	beq	Lml_n		@ INF/NAN / INF/NAN -> NAN
 	teq	r4, ip
 	bne	1f
 	orrs	r4, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	bne	Lml_n		@ NAN / <anything> -> NAN
 	teq	r5, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	bne	Lml_i		@ INF / <anything> -> INF
 	mov	xl, yl
 	mov	xh, yh
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+	b	Lml_n		@ INF / (INF or NAN) -> NAN
 1:	teq	r5, ip
 	bne	2f
 	orrs	r5, yl, yh, lsl #12
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	beq	Lml_z		@ <anything> / INF -> 0
 	mov	xl, yl
 	mov	xh, yh
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	Lml_n		@ <anything> / NAN -> NAN
 2:	@ If both are nonzero, we need to normalize and resume above.
 	orrs	r6, xl, xh, lsl #1
 	do_it	ne
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
+	bne	Lml_d
 	@ One or both arguments are 0.
 	orrs	r4, xl, xh, lsl #1
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bne	Lml_i		@ <non_zero> / 0 -> INF
 	orrs	r5, yl, yh, lsl #1
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+	bne	Lml_z		@ 0 / <non_zero> -> 0
+	b	Lml_n		@ 0 / 0 -> NAN
 
 	FUNC_END aeabi_ddiv
 	FUNC_END divdf3
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index c9bca4d..45bada4 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
 
 #ifdef L_arm_muldivsf3
 
-ARM_FUNC_START mulsf3
+ARM_FUNC_START mulsf3, function_section
 ARM_FUNC_ALIAS aeabi_fmul mulsf3
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3
 	COND(and,s,ne)	r3, ip, r1, lsr #23
 	teqne	r2, ip
 	teqne	r3, ip
-	beq	LSYM(Lml_s)
+	beq	Lml_s
 LSYM(Lml_x):
 
 	@ Add exponents together
@@ -490,7 +490,7 @@ LSYM(Lml_x):
 	@ Apply exponent bias, check for under/overflow.
 	sbc	r2, r2, #127
 	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	r3, #0x80000000
@@ -518,9 +518,12 @@ LSYM(Lml_1):
 	mov	r3, #0
 	subs	r2, r2, #1
 
-LSYM(Lml_u):
+	FUNC_END aeabi_fmul
+	FUNC_END mulsf3
+
+ARM_SYM_START Lml_u
 	@ Overflow?
-	bgt	LSYM(Lml_o)
+	bgt	Lml_o
 
 	@ Check if denormalized result is possible, otherwise return signed 0.
 	cmn	r2, #(24 + 1)
@@ -540,10 +543,11 @@ LSYM(Lml_u):
 	do_it	eq
 	biceq	r0, r0, ip, lsr #31
 	RET
+	SYM_END Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
 	teq	r2, #0
 	and	ip, r0, #0x80000000
 1:	do_it	eq, tt
@@ -561,8 +565,9 @@ LSYM(Lml_d):
 	beq	2b
 	orr	r1, r1, ip
 	b	LSYM(Lml_x)
+	SYM_END Lml_d
 
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
 	@ Isolate the INF and NAN cases away
 	and	r3, ip, r1, lsr #23
 	teq	r2, ip
@@ -574,10 +579,11 @@ LSYM(Lml_s):
 	bics	ip, r0, #0x80000000
 	do_it	ne
 	COND(bic,s,ne)	ip, r1, #0x80000000
-	bne	LSYM(Lml_d)
+	bne	Lml_d
+	SYM_END Lml_s
 
 	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
 	eor	r0, r0, r1
 	bic	r0, r0, #0x7fffffff
 	RET
@@ -589,39 +595,41 @@ LSYM(Lml_z):
 	moveq	r0, r1
 	teqne	r1, #0x0
 	teqne	r1, #0x80000000
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	beq	Lml_n		@ 0 * INF or INF * 0 -> NAN
 	teq	r2, ip
 	bne	1f
 	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+	bne	Lml_n		@ NAN * <anything> -> NAN
 1:	teq	r3, ip
-	bne	LSYM(Lml_i)
+	bne	Lml_i
 	movs	r3, r1, lsl #9
 	do_it	ne
 	movne	r0, r1
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+	bne	Lml_n		@ <anything> * NAN -> NAN
+	SYM_END Lml_z
 
 	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
 	eor	r0, r0, r1
+	SYM_END Lml_i
 
 	@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
 	and	r0, r0, #0x80000000
 	orr	r0, r0, #0x7f000000
 	orr	r0, r0, #0x00800000
 	RET
+	SYM_END Lml_o
 
 	@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
 	orr	r0, r0, #0x7f000000
 	orr	r0, r0, #0x00c00000
 	RET
+	SYM_END Lml_n
 
-	FUNC_END aeabi_fmul
-	FUNC_END mulsf3
 
-ARM_FUNC_START divsf3
+ARM_FUNC_START divsf3 function_section
 ARM_FUNC_ALIAS aeabi_fdiv divsf3
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -684,7 +692,7 @@ LSYM(Ldv_x):
 
 	@ Check exponent for under/overflow.
 	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	r3, r1
@@ -706,7 +714,7 @@ LSYM(Ldv_1):
 	orr	r0, r0, #0x00800000
 	mov	r3, #0
 	subs	r2, r2, #1
-	b	LSYM(Lml_u)
+	b	Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
@@ -735,17 +743,17 @@ LSYM(Ldv_s):
 	teq	r2, ip
 	bne	1f
 	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	bne	Lml_n		@ NAN / <anything> -> NAN
 	teq	r3, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	bne	Lml_i		@ INF / <anything> -> INF
 	mov	r0, r1
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+	b	Lml_n		@ INF / (INF or NAN) -> NAN
 1:	teq	r3, ip
 	bne	2f
 	movs	r3, r1, lsl #9
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	beq	Lml_z		@ <anything> / INF -> 0
 	mov	r0, r1
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	Lml_n		@ <anything> / NAN -> NAN
 2:	@ If both are nonzero, we need to normalize and resume above.
 	bics	ip, r0, #0x80000000
 	do_it	ne
@@ -753,10 +761,10 @@ LSYM(Ldv_s):
 	bne	LSYM(Ldv_d)
 	@ One or both arguments are zero.
 	bics	r2, r0, #0x80000000
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bne	Lml_i		@ <non_zero> / 0 -> INF
 	bics	r3, r1, #0x80000000
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+	bne	Lml_z		@ 0 / <non_zero> -> 0
+	b	Lml_n		@ 0 / 0 -> NAN
 
 	FUNC_END aeabi_fdiv
 	FUNC_END divsf3

^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-09-26  2:11 Tony Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-09-26  2:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan

Ping?

> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Tuesday, September 16, 2014 11:01 AM
> To: 'gcc-patches@gcc.gnu.org'
> Cc: Richard Earnshaw; Ramana Radhakrishnan
> Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> 
> Ping?
> 
> > -----Original Message-----
> > From: Tony Wang [mailto:tony.wang@arm.com]
> > Sent: Thursday, September 04, 2014 10:16 AM
> > To: 'gcc-patches@gcc.gnu.org'
> > Cc: Richard Earnshaw; Ramana Radhakrishnan
> > Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> >
> > Ping 2?
> >
> > > -----Original Message-----
> > > From: Tony Wang [mailto:tony.wang@arm.com]
> > > Sent: Thursday, August 28, 2014 2:02 PM
> > > To: 'gcc-patches@gcc.gnu.org'
> > > Cc: Richard Earnshaw; Ramana Radhakrishnan
> > > Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> > >
> > > Ping?
> > >
> > > > -----Original Message-----
> > > > From: Tony Wang [mailto:tony.wang@arm.com]
> > > > Sent: Thursday, August 21, 2014 2:15 PM
> > > > To: 'gcc-patches@gcc.gnu.org'
> > > > Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> > > >
> > > > Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate
> veneer
> > > > when the two section is too far away from each other. Also, I have both manually and using some test
> cases
> > to
> > > > verify that IP and PSR are not alive at such point.
> > > >
> > > > gcc/libgcc/ChangeLog:
> > > > 2014-8-21   Tony Wang <tony.wang@arm.com>
> > > >
> > > >         * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> > > >         * config/arm/ieee754-df.S: Same with above
> > > >
> > > > BR,
> > > > Tony



^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-09-16  3:00 Tony Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-09-16  3:00 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan

Ping?

> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Thursday, September 04, 2014 10:16 AM
> To: 'gcc-patches@gcc.gnu.org'
> Cc: Richard Earnshaw; Ramana Radhakrishnan
> Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> 
> Ping 2?
> 
> > -----Original Message-----
> > From: Tony Wang [mailto:tony.wang@arm.com]
> > Sent: Thursday, August 28, 2014 2:02 PM
> > To: 'gcc-patches@gcc.gnu.org'
> > Cc: Richard Earnshaw; Ramana Radhakrishnan
> > Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> >
> > Ping?
> >
> > > -----Original Message-----
> > > From: Tony Wang [mailto:tony.wang@arm.com]
> > > Sent: Thursday, August 21, 2014 2:15 PM
> > > To: 'gcc-patches@gcc.gnu.org'
> > > Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> > >
> > > Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate
veneer
> > > when the two section is too far away from each other. Also, I have both manually and using some test
cases
> to
> > > verify that IP and PSR are not alive at such point.
> > >
> > > gcc/libgcc/ChangeLog:
> > > 2014-8-21   Tony Wang <tony.wang@arm.com>
> > >
> > >         * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> > >         * config/arm/ieee754-df.S: Same with above
> > >
> > > BR,
> > > Tony



^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-08-28  6:02 Tony Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-08-28  6:02 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan

Ping?

> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Thursday, August 21, 2014 2:15 PM
> To: 'gcc-patches@gcc.gnu.org'
> Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> 
> Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate veneer
> when the two section is too far away from each other. Also, I have both manually and using some test cases
to
> verify that IP and PSR are not alive at such point.
> 
> gcc/libgcc/ChangeLog:
> 2014-8-21   Tony Wang <tony.wang@arm.com>
> 
>         * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
>         * config/arm/ieee754-df.S: Same with above
> 
> BR,
> Tony


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-08-21  6:14 Tony Wang
  0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-08-21  6:14 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 503 bytes --]

Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate veneer when
the two section is too far away from each other. Also, I have both manually and using some test cases to
verify that IP and PSR are not alive at such point.

gcc/libgcc/ChangeLog:
2014-8-21   Tony Wang <tony.wang@arm.com>

        * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
        * config/arm/ieee754-df.S: Same with above

BR,
Tony

[-- Attachment #2: libgcc_mul_div_code_size_reduction_2.diff --]
[-- Type: application/octet-stream, Size: 9360 bytes --]

diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index 406bb70..ecdd46f 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
 
 #ifdef L_arm_muldivdf3
 
-ARM_FUNC_START muldf3
+ARM_FUNC_START muldf3, function_section
 ARM_FUNC_ALIAS aeabi_dmul muldf3
 	do_push	{r4, r5, r6, lr}
 
@@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
 	COND(and,s,ne)	r5, ip, yh, lsr #20
 	teqne	r4, ip
 	teqne	r5, ip
-	bleq	LSYM(Lml_s)
+	bleq	Lml_s
 
 	@ Add exponents together
 	add	r4, r4, r5
@@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
 	subs	ip, r4, #(254 - 1)
 	do_it	hi
 	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	lr, #0x80000000
@@ -716,9 +716,12 @@ LSYM(Lml_1):
 	mov	lr, #0
 	subs	r4, r4, #1
 
-LSYM(Lml_u):
+	FUNC_END aeabi_dmul
+	FUNC_END muldf3
+
+ARM_SYM_START Lml_u
 	@ Overflow?
-	bgt	LSYM(Lml_o)
+	bgt	Lml_o
 
 	@ Check if denormalized result is possible, otherwise return signed 0.
 	cmn	r4, #(53 + 1)
@@ -778,10 +781,11 @@ LSYM(Lml_u):
 	do_it	eq
 	biceq	xl, xl, r3, lsr #31
 	RETLDM	"r4, r5, r6"
+	SYM_END Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
 	teq	r4, #0
 	bne	2f
 	and	r6, xh, #0x80000000
@@ -804,8 +808,9 @@ LSYM(Lml_d):
 	beq	3b
 	orr	yh, yh, r6
 	RET
+	SYM_END Lml_d
 
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
 	@ Isolate the INF and NAN cases away
 	teq	r4, ip
 	and	r5, ip, yh, lsr #20
@@ -817,10 +822,11 @@ LSYM(Lml_s):
 	orrs	r6, xl, xh, lsl #1
 	do_it	ne
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
+	bne	Lml_d
+	SYM_END Lml_s
 
 	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
 	eor	xh, xh, yh
 	and	xh, xh, #0x80000000
 	mov	xl, #0
@@ -832,41 +838,42 @@ LSYM(Lml_z):
 	moveq	xl, yl
 	moveq	xh, yh
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	beq	Lml_n		@ 0 * INF or INF * 0 -> NAN
 	teq	r4, ip
 	bne	1f
 	orrs	r6, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+	bne	Lml_n		@ NAN * <anything> -> NAN
 1:	teq	r5, ip
-	bne	LSYM(Lml_i)
+	bne	Lml_i
 	orrs	r6, yl, yh, lsl #12
 	do_it	ne, t
 	movne	xl, yl
 	movne	xh, yh
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+	bne	Lml_n		@ <anything> * NAN -> NAN
+	SYM_END Lml_z
 
 	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
 	eor	xh, xh, yh
+	SYM_END Lml_i
 
 	@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
 	and	xh, xh, #0x80000000
 	orr	xh, xh, #0x7f000000
 	orr	xh, xh, #0x00f00000
 	mov	xl, #0
 	RETLDM	"r4, r5, r6"
+	SYM_END Lml_o
 
 	@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
 	orr	xh, xh, #0x7f000000
 	orr	xh, xh, #0x00f80000
 	RETLDM	"r4, r5, r6"
+	SYM_END Lml_n
 
-	FUNC_END aeabi_dmul
-	FUNC_END muldf3
-
-ARM_FUNC_START divdf3
+ARM_FUNC_START divdf3 function_section
 ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	
 	do_push	{r4, r5, r6, lr}
@@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
 	subs	ip, r4, #(254 - 1)
 	do_it	hi
 	cmphi	ip, #0x700
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	subs	ip, r5, yh
@@ -1009,13 +1016,13 @@ LSYM(Ldv_1):
 	orr	xh, xh, #0x00100000
 	mov	lr, #0
 	subs	r4, r4, #1
-	b	LSYM(Lml_u)
+	b	Lml_u
 
 	@ Result mightt need to be denormalized: put remainder bits
 	@ in lr for rounding considerations.
 LSYM(Ldv_u):
 	orr	lr, r5, r6
-	b	LSYM(Lml_u)
+	b	Lml_u
 
 	@ One or both arguments is either INF, NAN or zero.
 LSYM(Ldv_s):
@@ -1023,34 +1030,34 @@ LSYM(Ldv_s):
 	teq	r4, ip
 	do_it	eq
 	teqeq	r5, ip
-	beq	LSYM(Lml_n)		@ INF/NAN / INF/NAN -> NAN
+	beq	Lml_n		@ INF/NAN / INF/NAN -> NAN
 	teq	r4, ip
 	bne	1f
 	orrs	r4, xl, xh, lsl #12
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	bne	Lml_n		@ NAN / <anything> -> NAN
 	teq	r5, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	bne	Lml_i		@ INF / <anything> -> INF
 	mov	xl, yl
 	mov	xh, yh
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+	b	Lml_n		@ INF / (INF or NAN) -> NAN
 1:	teq	r5, ip
 	bne	2f
 	orrs	r5, yl, yh, lsl #12
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	beq	Lml_z		@ <anything> / INF -> 0
 	mov	xl, yl
 	mov	xh, yh
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	Lml_n		@ <anything> / NAN -> NAN
 2:	@ If both are nonzero, we need to normalize and resume above.
 	orrs	r6, xl, xh, lsl #1
 	do_it	ne
 	COND(orr,s,ne)	r6, yl, yh, lsl #1
-	bne	LSYM(Lml_d)
+	bne	Lml_d
 	@ One or both arguments are 0.
 	orrs	r4, xl, xh, lsl #1
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bne	Lml_i		@ <non_zero> / 0 -> INF
 	orrs	r5, yl, yh, lsl #1
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+	bne	Lml_z		@ 0 / <non_zero> -> 0
+	b	Lml_n		@ 0 / 0 -> NAN
 
 	FUNC_END aeabi_ddiv
 	FUNC_END divdf3
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index c9bca4d..45bada4 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
 
 #ifdef L_arm_muldivsf3
 
-ARM_FUNC_START mulsf3
+ARM_FUNC_START mulsf3, function_section
 ARM_FUNC_ALIAS aeabi_fmul mulsf3
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3
 	COND(and,s,ne)	r3, ip, r1, lsr #23
 	teqne	r2, ip
 	teqne	r3, ip
-	beq	LSYM(Lml_s)
+	beq	Lml_s
 LSYM(Lml_x):
 
 	@ Add exponents together
@@ -490,7 +490,7 @@ LSYM(Lml_x):
 	@ Apply exponent bias, check for under/overflow.
 	sbc	r2, r2, #127
 	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	r3, #0x80000000
@@ -518,9 +518,12 @@ LSYM(Lml_1):
 	mov	r3, #0
 	subs	r2, r2, #1
 
-LSYM(Lml_u):
+	FUNC_END aeabi_fmul
+	FUNC_END mulsf3
+
+ARM_SYM_START Lml_u
 	@ Overflow?
-	bgt	LSYM(Lml_o)
+	bgt	Lml_o
 
 	@ Check if denormalized result is possible, otherwise return signed 0.
 	cmn	r2, #(24 + 1)
@@ -540,10 +543,11 @@ LSYM(Lml_u):
 	do_it	eq
 	biceq	r0, r0, ip, lsr #31
 	RET
+	SYM_END Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
 	teq	r2, #0
 	and	ip, r0, #0x80000000
 1:	do_it	eq, tt
@@ -561,8 +565,9 @@ LSYM(Lml_d):
 	beq	2b
 	orr	r1, r1, ip
 	b	LSYM(Lml_x)
+	SYM_END Lml_d
 
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
 	@ Isolate the INF and NAN cases away
 	and	r3, ip, r1, lsr #23
 	teq	r2, ip
@@ -574,10 +579,11 @@ LSYM(Lml_s):
 	bics	ip, r0, #0x80000000
 	do_it	ne
 	COND(bic,s,ne)	ip, r1, #0x80000000
-	bne	LSYM(Lml_d)
+	bne	Lml_d
+	SYM_END Lml_s
 
 	@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
 	eor	r0, r0, r1
 	bic	r0, r0, #0x7fffffff
 	RET
@@ -589,39 +595,41 @@ LSYM(Lml_z):
 	moveq	r0, r1
 	teqne	r1, #0x0
 	teqne	r1, #0x80000000
-	beq	LSYM(Lml_n)		@ 0 * INF or INF * 0 -> NAN
+	beq	Lml_n		@ 0 * INF or INF * 0 -> NAN
 	teq	r2, ip
 	bne	1f
 	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN * <anything> -> NAN
+	bne	Lml_n		@ NAN * <anything> -> NAN
 1:	teq	r3, ip
-	bne	LSYM(Lml_i)
+	bne	Lml_i
 	movs	r3, r1, lsl #9
 	do_it	ne
 	movne	r0, r1
-	bne	LSYM(Lml_n)		@ <anything> * NAN -> NAN
+	bne	Lml_n		@ <anything> * NAN -> NAN
+	SYM_END Lml_z
 
 	@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
 	eor	r0, r0, r1
+	SYM_END Lml_i
 
 	@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
 	and	r0, r0, #0x80000000
 	orr	r0, r0, #0x7f000000
 	orr	r0, r0, #0x00800000
 	RET
+	SYM_END Lml_o
 
 	@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
 	orr	r0, r0, #0x7f000000
 	orr	r0, r0, #0x00c00000
 	RET
+	SYM_END Lml_n
 
-	FUNC_END aeabi_fmul
-	FUNC_END mulsf3
 
-ARM_FUNC_START divsf3
+ARM_FUNC_START divsf3 function_section
 ARM_FUNC_ALIAS aeabi_fdiv divsf3
 
 	@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -684,7 +692,7 @@ LSYM(Ldv_x):
 
 	@ Check exponent for under/overflow.
 	cmp	r2, #(254 - 1)
-	bhi	LSYM(Lml_u)
+	bhi	Lml_u
 
 	@ Round the result, merge final exponent.
 	cmp	r3, r1
@@ -706,7 +714,7 @@ LSYM(Ldv_1):
 	orr	r0, r0, #0x00800000
 	mov	r3, #0
 	subs	r2, r2, #1
-	b	LSYM(Lml_u)
+	b	Lml_u
 
 	@ One or both arguments are denormalized.
 	@ Scale them leftwards and preserve sign bit.
@@ -735,17 +743,17 @@ LSYM(Ldv_s):
 	teq	r2, ip
 	bne	1f
 	movs	r2, r0, lsl #9
-	bne	LSYM(Lml_n)		@ NAN / <anything> -> NAN
+	bne	Lml_n		@ NAN / <anything> -> NAN
 	teq	r3, ip
-	bne	LSYM(Lml_i)		@ INF / <anything> -> INF
+	bne	Lml_i		@ INF / <anything> -> INF
 	mov	r0, r1
-	b	LSYM(Lml_n)		@ INF / (INF or NAN) -> NAN
+	b	Lml_n		@ INF / (INF or NAN) -> NAN
 1:	teq	r3, ip
 	bne	2f
 	movs	r3, r1, lsl #9
-	beq	LSYM(Lml_z)		@ <anything> / INF -> 0
+	beq	Lml_z		@ <anything> / INF -> 0
 	mov	r0, r1
-	b	LSYM(Lml_n)		@ <anything> / NAN -> NAN
+	b	Lml_n		@ <anything> / NAN -> NAN
 2:	@ If both are nonzero, we need to normalize and resume above.
 	bics	ip, r0, #0x80000000
 	do_it	ne
@@ -753,10 +761,10 @@ LSYM(Ldv_s):
 	bne	LSYM(Ldv_d)
 	@ One or both arguments are zero.
 	bics	r2, r0, #0x80000000
-	bne	LSYM(Lml_i)		@ <non_zero> / 0 -> INF
+	bne	Lml_i		@ <non_zero> / 0 -> INF
 	bics	r3, r1, #0x80000000
-	bne	LSYM(Lml_z)		@ 0 / <non_zero> -> 0
-	b	LSYM(Lml_n)		@ 0 / 0 -> NAN
+	bne	Lml_z		@ 0 / <non_zero> -> 0
+	b	Lml_n		@ 0 / 0 -> NAN
 
 	FUNC_END aeabi_fdiv
 	FUNC_END divsf3

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-09-26  2:11 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-04  2:16 [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc Tony Wang
  -- strict thread matches above, loose matches on Subject: below --
2014-09-26  2:11 Tony Wang
2014-09-16  3:00 Tony Wang
2014-08-28  6:02 Tony Wang
2014-08-21  6:14 Tony Wang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).