* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-09-04 2:16 Tony Wang
0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-09-04 2:16 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan
[-- Attachment #1: Type: text/plain, Size: 1162 bytes --]
Ping 2?
> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Thursday, August 28, 2014 2:02 PM
> To: 'gcc-patches@gcc.gnu.org'
> Cc: Richard Earnshaw; Ramana Radhakrishnan
> Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
>
> Ping?
>
> > -----Original Message-----
> > From: Tony Wang [mailto:tony.wang@arm.com]
> > Sent: Thursday, August 21, 2014 2:15 PM
> > To: 'gcc-patches@gcc.gnu.org'
> > Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> >
> > Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate veneer
> > when the two section is too far away from each other. Also, I have both manually and using some test cases
to
> > verify that IP and PSR are not alive at such point.
> >
> > gcc/libgcc/ChangeLog:
> > 2014-8-21 Tony Wang <tony.wang@arm.com>
> >
> > * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> > * config/arm/ieee754-df.S: Same with above
> >
> > BR,
> > Tony
[-- Attachment #2: libgcc_mul_div_code_size_reduction_2.diff --]
[-- Type: application/octet-stream, Size: 9360 bytes --]
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index 406bb70..ecdd46f 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
#ifdef L_arm_muldivdf3
-ARM_FUNC_START muldf3
+ARM_FUNC_START muldf3, function_section
ARM_FUNC_ALIAS aeabi_dmul muldf3
do_push {r4, r5, r6, lr}
@@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
COND(and,s,ne) r5, ip, yh, lsr #20
teqne r4, ip
teqne r5, ip
- bleq LSYM(Lml_s)
+ bleq Lml_s
@ Add exponents together
add r4, r4, r5
@@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
subs ip, r4, #(254 - 1)
do_it hi
cmphi ip, #0x700
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
cmp lr, #0x80000000
@@ -716,9 +716,12 @@ LSYM(Lml_1):
mov lr, #0
subs r4, r4, #1
-LSYM(Lml_u):
+ FUNC_END aeabi_dmul
+ FUNC_END muldf3
+
+ARM_SYM_START Lml_u
@ Overflow?
- bgt LSYM(Lml_o)
+ bgt Lml_o
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r4, #(53 + 1)
@@ -778,10 +781,11 @@ LSYM(Lml_u):
do_it eq
biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
+ SYM_END Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
teq r4, #0
bne 2f
and r6, xh, #0x80000000
@@ -804,8 +808,9 @@ LSYM(Lml_d):
beq 3b
orr yh, yh, r6
RET
+ SYM_END Lml_d
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
@ Isolate the INF and NAN cases away
teq r4, ip
and r5, ip, yh, lsr #20
@@ -817,10 +822,11 @@ LSYM(Lml_s):
orrs r6, xl, xh, lsl #1
do_it ne
COND(orr,s,ne) r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
+ bne Lml_d
+ SYM_END Lml_s
@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
eor xh, xh, yh
and xh, xh, #0x80000000
mov xl, #0
@@ -832,41 +838,42 @@ LSYM(Lml_z):
moveq xl, yl
moveq xh, yh
COND(orr,s,ne) r6, yl, yh, lsl #1
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ beq Lml_n @ 0 * INF or INF * 0 -> NAN
teq r4, ip
bne 1f
orrs r6, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+ bne Lml_n @ NAN * <anything> -> NAN
1: teq r5, ip
- bne LSYM(Lml_i)
+ bne Lml_i
orrs r6, yl, yh, lsl #12
do_it ne, t
movne xl, yl
movne xh, yh
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+ bne Lml_n @ <anything> * NAN -> NAN
+ SYM_END Lml_z
@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
eor xh, xh, yh
+ SYM_END Lml_i
@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
and xh, xh, #0x80000000
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f00000
mov xl, #0
RETLDM "r4, r5, r6"
+ SYM_END Lml_o
@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6"
+ SYM_END Lml_n
- FUNC_END aeabi_dmul
- FUNC_END muldf3
-
-ARM_FUNC_START divdf3
+ARM_FUNC_START divdf3 function_section
ARM_FUNC_ALIAS aeabi_ddiv divdf3
do_push {r4, r5, r6, lr}
@@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
subs ip, r4, #(254 - 1)
do_it hi
cmphi ip, #0x700
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
subs ip, r5, yh
@@ -1009,13 +1016,13 @@ LSYM(Ldv_1):
orr xh, xh, #0x00100000
mov lr, #0
subs r4, r4, #1
- b LSYM(Lml_u)
+ b Lml_u
@ Result mightt need to be denormalized: put remainder bits
@ in lr for rounding considerations.
LSYM(Ldv_u):
orr lr, r5, r6
- b LSYM(Lml_u)
+ b Lml_u
@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
@@ -1023,34 +1030,34 @@ LSYM(Ldv_s):
teq r4, ip
do_it eq
teqeq r5, ip
- beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
+ beq Lml_n @ INF/NAN / INF/NAN -> NAN
teq r4, ip
bne 1f
orrs r4, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ bne Lml_n @ NAN / <anything> -> NAN
teq r5, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
+ bne Lml_i @ INF / <anything> -> INF
mov xl, yl
mov xh, yh
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+ b Lml_n @ INF / (INF or NAN) -> NAN
1: teq r5, ip
bne 2f
orrs r5, yl, yh, lsl #12
- beq LSYM(Lml_z) @ <anything> / INF -> 0
+ beq Lml_z @ <anything> / INF -> 0
mov xl, yl
mov xh, yh
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b Lml_n @ <anything> / NAN -> NAN
2: @ If both are nonzero, we need to normalize and resume above.
orrs r6, xl, xh, lsl #1
do_it ne
COND(orr,s,ne) r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
+ bne Lml_d
@ One or both arguments are 0.
orrs r4, xl, xh, lsl #1
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bne Lml_i @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
+ bne Lml_z @ 0 / <non_zero> -> 0
+ b Lml_n @ 0 / 0 -> NAN
FUNC_END aeabi_ddiv
FUNC_END divdf3
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index c9bca4d..45bada4 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
#ifdef L_arm_muldivsf3
-ARM_FUNC_START mulsf3
+ARM_FUNC_START mulsf3, function_section
ARM_FUNC_ALIAS aeabi_fmul mulsf3
@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3
COND(and,s,ne) r3, ip, r1, lsr #23
teqne r2, ip
teqne r3, ip
- beq LSYM(Lml_s)
+ beq Lml_s
LSYM(Lml_x):
@ Add exponents together
@@ -490,7 +490,7 @@ LSYM(Lml_x):
@ Apply exponent bias, check for under/overflow.
sbc r2, r2, #127
cmp r2, #(254 - 1)
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
cmp r3, #0x80000000
@@ -518,9 +518,12 @@ LSYM(Lml_1):
mov r3, #0
subs r2, r2, #1
-LSYM(Lml_u):
+ FUNC_END aeabi_fmul
+ FUNC_END mulsf3
+
+ARM_SYM_START Lml_u
@ Overflow?
- bgt LSYM(Lml_o)
+ bgt Lml_o
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r2, #(24 + 1)
@@ -540,10 +543,11 @@ LSYM(Lml_u):
do_it eq
biceq r0, r0, ip, lsr #31
RET
+ SYM_END Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
teq r2, #0
and ip, r0, #0x80000000
1: do_it eq, tt
@@ -561,8 +565,9 @@ LSYM(Lml_d):
beq 2b
orr r1, r1, ip
b LSYM(Lml_x)
+ SYM_END Lml_d
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
@ Isolate the INF and NAN cases away
and r3, ip, r1, lsr #23
teq r2, ip
@@ -574,10 +579,11 @@ LSYM(Lml_s):
bics ip, r0, #0x80000000
do_it ne
COND(bic,s,ne) ip, r1, #0x80000000
- bne LSYM(Lml_d)
+ bne Lml_d
+ SYM_END Lml_s
@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
eor r0, r0, r1
bic r0, r0, #0x7fffffff
RET
@@ -589,39 +595,41 @@ LSYM(Lml_z):
moveq r0, r1
teqne r1, #0x0
teqne r1, #0x80000000
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ beq Lml_n @ 0 * INF or INF * 0 -> NAN
teq r2, ip
bne 1f
movs r2, r0, lsl #9
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+ bne Lml_n @ NAN * <anything> -> NAN
1: teq r3, ip
- bne LSYM(Lml_i)
+ bne Lml_i
movs r3, r1, lsl #9
do_it ne
movne r0, r1
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+ bne Lml_n @ <anything> * NAN -> NAN
+ SYM_END Lml_z
@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
eor r0, r0, r1
+ SYM_END Lml_i
@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
and r0, r0, #0x80000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00800000
RET
+ SYM_END Lml_o
@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
orr r0, r0, #0x7f000000
orr r0, r0, #0x00c00000
RET
+ SYM_END Lml_n
- FUNC_END aeabi_fmul
- FUNC_END mulsf3
-ARM_FUNC_START divsf3
+ARM_FUNC_START divsf3 function_section
ARM_FUNC_ALIAS aeabi_fdiv divsf3
@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -684,7 +692,7 @@ LSYM(Ldv_x):
@ Check exponent for under/overflow.
cmp r2, #(254 - 1)
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
cmp r3, r1
@@ -706,7 +714,7 @@ LSYM(Ldv_1):
orr r0, r0, #0x00800000
mov r3, #0
subs r2, r2, #1
- b LSYM(Lml_u)
+ b Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
@@ -735,17 +743,17 @@ LSYM(Ldv_s):
teq r2, ip
bne 1f
movs r2, r0, lsl #9
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ bne Lml_n @ NAN / <anything> -> NAN
teq r3, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
+ bne Lml_i @ INF / <anything> -> INF
mov r0, r1
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+ b Lml_n @ INF / (INF or NAN) -> NAN
1: teq r3, ip
bne 2f
movs r3, r1, lsl #9
- beq LSYM(Lml_z) @ <anything> / INF -> 0
+ beq Lml_z @ <anything> / INF -> 0
mov r0, r1
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b Lml_n @ <anything> / NAN -> NAN
2: @ If both are nonzero, we need to normalize and resume above.
bics ip, r0, #0x80000000
do_it ne
@@ -753,10 +761,10 @@ LSYM(Ldv_s):
bne LSYM(Ldv_d)
@ One or both arguments are zero.
bics r2, r0, #0x80000000
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bne Lml_i @ <non_zero> / 0 -> INF
bics r3, r1, #0x80000000
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
+ bne Lml_z @ 0 / <non_zero> -> 0
+ b Lml_n @ 0 / 0 -> NAN
FUNC_END aeabi_fdiv
FUNC_END divsf3
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-09-26 2:11 Tony Wang
0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-09-26 2:11 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan
Ping?
> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Tuesday, September 16, 2014 11:01 AM
> To: 'gcc-patches@gcc.gnu.org'
> Cc: Richard Earnshaw; Ramana Radhakrishnan
> Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
>
> Ping?
>
> > -----Original Message-----
> > From: Tony Wang [mailto:tony.wang@arm.com]
> > Sent: Thursday, September 04, 2014 10:16 AM
> > To: 'gcc-patches@gcc.gnu.org'
> > Cc: Richard Earnshaw; Ramana Radhakrishnan
> > Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> >
> > Ping 2?
> >
> > > -----Original Message-----
> > > From: Tony Wang [mailto:tony.wang@arm.com]
> > > Sent: Thursday, August 28, 2014 2:02 PM
> > > To: 'gcc-patches@gcc.gnu.org'
> > > Cc: Richard Earnshaw; Ramana Radhakrishnan
> > > Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> > >
> > > Ping?
> > >
> > > > -----Original Message-----
> > > > From: Tony Wang [mailto:tony.wang@arm.com]
> > > > Sent: Thursday, August 21, 2014 2:15 PM
> > > > To: 'gcc-patches@gcc.gnu.org'
> > > > Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> > > >
> > > > Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate
> veneer
> > > > when the two section is too far away from each other. Also, I have both manually and using some test
> cases
> > to
> > > > verify that IP and PSR are not alive at such point.
> > > >
> > > > gcc/libgcc/ChangeLog:
> > > > 2014-8-21 Tony Wang <tony.wang@arm.com>
> > > >
> > > > * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> > > > * config/arm/ieee754-df.S: Same with above
> > > >
> > > > BR,
> > > > Tony
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-09-16 3:00 Tony Wang
0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-09-16 3:00 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan
Ping?
> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Thursday, September 04, 2014 10:16 AM
> To: 'gcc-patches@gcc.gnu.org'
> Cc: Richard Earnshaw; Ramana Radhakrishnan
> Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
>
> Ping 2?
>
> > -----Original Message-----
> > From: Tony Wang [mailto:tony.wang@arm.com]
> > Sent: Thursday, August 28, 2014 2:02 PM
> > To: 'gcc-patches@gcc.gnu.org'
> > Cc: Richard Earnshaw; Ramana Radhakrishnan
> > Subject: RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in
libgcc
> >
> > Ping?
> >
> > > -----Original Message-----
> > > From: Tony Wang [mailto:tony.wang@arm.com]
> > > Sent: Thursday, August 21, 2014 2:15 PM
> > > To: 'gcc-patches@gcc.gnu.org'
> > > Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
> > >
> > > Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate
veneer
> > > when the two section is too far away from each other. Also, I have both manually and using some test
cases
> to
> > > verify that IP and PSR are not alive at such point.
> > >
> > > gcc/libgcc/ChangeLog:
> > > 2014-8-21 Tony Wang <tony.wang@arm.com>
> > >
> > > * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> > > * config/arm/ieee754-df.S: Same with above
> > >
> > > BR,
> > > Tony
^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-08-28 6:02 Tony Wang
0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-08-28 6:02 UTC (permalink / raw)
To: gcc-patches; +Cc: Richard Earnshaw, Ramana Radhakrishnan
Ping?
> -----Original Message-----
> From: Tony Wang [mailto:tony.wang@arm.com]
> Sent: Thursday, August 21, 2014 2:15 PM
> To: 'gcc-patches@gcc.gnu.org'
> Subject: [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
>
> Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate veneer
> when the two section is too far away from each other. Also, I have both manually and using some test cases
to
> verify that IP and PSR are not alive at such point.
>
> gcc/libgcc/ChangeLog:
> 2014-8-21 Tony Wang <tony.wang@arm.com>
>
> * config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
> * config/arm/ieee754-df.S: Same with above
>
> BR,
> Tony
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
@ 2014-08-21 6:14 Tony Wang
0 siblings, 0 replies; 5+ messages in thread
From: Tony Wang @ 2014-08-21 6:14 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 503 bytes --]
Step 2: Mark all the symbols around the fragment boundaries as function symbols, so as to generate veneer when
the two section is too far away from each other. Also, I have both manually and using some test cases to
verify that IP and PSR are not alive at such point.
gcc/libgcc/ChangeLog:
2014-8-21 Tony Wang <tony.wang@arm.com>
* config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
* config/arm/ieee754-df.S: Same with above
BR,
Tony
[-- Attachment #2: libgcc_mul_div_code_size_reduction_2.diff --]
[-- Type: application/octet-stream, Size: 9360 bytes --]
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index 406bb70..ecdd46f 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
#ifdef L_arm_muldivdf3
-ARM_FUNC_START muldf3
+ARM_FUNC_START muldf3, function_section
ARM_FUNC_ALIAS aeabi_dmul muldf3
do_push {r4, r5, r6, lr}
@@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
COND(and,s,ne) r5, ip, yh, lsr #20
teqne r4, ip
teqne r5, ip
- bleq LSYM(Lml_s)
+ bleq Lml_s
@ Add exponents together
add r4, r4, r5
@@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
subs ip, r4, #(254 - 1)
do_it hi
cmphi ip, #0x700
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
cmp lr, #0x80000000
@@ -716,9 +716,12 @@ LSYM(Lml_1):
mov lr, #0
subs r4, r4, #1
-LSYM(Lml_u):
+ FUNC_END aeabi_dmul
+ FUNC_END muldf3
+
+ARM_SYM_START Lml_u
@ Overflow?
- bgt LSYM(Lml_o)
+ bgt Lml_o
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r4, #(53 + 1)
@@ -778,10 +781,11 @@ LSYM(Lml_u):
do_it eq
biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
+ SYM_END Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
teq r4, #0
bne 2f
and r6, xh, #0x80000000
@@ -804,8 +808,9 @@ LSYM(Lml_d):
beq 3b
orr yh, yh, r6
RET
+ SYM_END Lml_d
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
@ Isolate the INF and NAN cases away
teq r4, ip
and r5, ip, yh, lsr #20
@@ -817,10 +822,11 @@ LSYM(Lml_s):
orrs r6, xl, xh, lsl #1
do_it ne
COND(orr,s,ne) r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
+ bne Lml_d
+ SYM_END Lml_s
@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
eor xh, xh, yh
and xh, xh, #0x80000000
mov xl, #0
@@ -832,41 +838,42 @@ LSYM(Lml_z):
moveq xl, yl
moveq xh, yh
COND(orr,s,ne) r6, yl, yh, lsl #1
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ beq Lml_n @ 0 * INF or INF * 0 -> NAN
teq r4, ip
bne 1f
orrs r6, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+ bne Lml_n @ NAN * <anything> -> NAN
1: teq r5, ip
- bne LSYM(Lml_i)
+ bne Lml_i
orrs r6, yl, yh, lsl #12
do_it ne, t
movne xl, yl
movne xh, yh
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+ bne Lml_n @ <anything> * NAN -> NAN
+ SYM_END Lml_z
@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
eor xh, xh, yh
+ SYM_END Lml_i
@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
and xh, xh, #0x80000000
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f00000
mov xl, #0
RETLDM "r4, r5, r6"
+ SYM_END Lml_o
@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6"
+ SYM_END Lml_n
- FUNC_END aeabi_dmul
- FUNC_END muldf3
-
-ARM_FUNC_START divdf3
+ARM_FUNC_START divdf3 function_section
ARM_FUNC_ALIAS aeabi_ddiv divdf3
do_push {r4, r5, r6, lr}
@@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
subs ip, r4, #(254 - 1)
do_it hi
cmphi ip, #0x700
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
subs ip, r5, yh
@@ -1009,13 +1016,13 @@ LSYM(Ldv_1):
orr xh, xh, #0x00100000
mov lr, #0
subs r4, r4, #1
- b LSYM(Lml_u)
+ b Lml_u
@ Result mightt need to be denormalized: put remainder bits
@ in lr for rounding considerations.
LSYM(Ldv_u):
orr lr, r5, r6
- b LSYM(Lml_u)
+ b Lml_u
@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
@@ -1023,34 +1030,34 @@ LSYM(Ldv_s):
teq r4, ip
do_it eq
teqeq r5, ip
- beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
+ beq Lml_n @ INF/NAN / INF/NAN -> NAN
teq r4, ip
bne 1f
orrs r4, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ bne Lml_n @ NAN / <anything> -> NAN
teq r5, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
+ bne Lml_i @ INF / <anything> -> INF
mov xl, yl
mov xh, yh
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+ b Lml_n @ INF / (INF or NAN) -> NAN
1: teq r5, ip
bne 2f
orrs r5, yl, yh, lsl #12
- beq LSYM(Lml_z) @ <anything> / INF -> 0
+ beq Lml_z @ <anything> / INF -> 0
mov xl, yl
mov xh, yh
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b Lml_n @ <anything> / NAN -> NAN
2: @ If both are nonzero, we need to normalize and resume above.
orrs r6, xl, xh, lsl #1
do_it ne
COND(orr,s,ne) r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
+ bne Lml_d
@ One or both arguments are 0.
orrs r4, xl, xh, lsl #1
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bne Lml_i @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
+ bne Lml_z @ 0 / <non_zero> -> 0
+ b Lml_n @ 0 / 0 -> NAN
FUNC_END aeabi_ddiv
FUNC_END divdf3
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index c9bca4d..45bada4 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
#ifdef L_arm_muldivsf3
-ARM_FUNC_START mulsf3
+ARM_FUNC_START mulsf3, function_section
ARM_FUNC_ALIAS aeabi_fmul mulsf3
@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3
COND(and,s,ne) r3, ip, r1, lsr #23
teqne r2, ip
teqne r3, ip
- beq LSYM(Lml_s)
+ beq Lml_s
LSYM(Lml_x):
@ Add exponents together
@@ -490,7 +490,7 @@ LSYM(Lml_x):
@ Apply exponent bias, check for under/overflow.
sbc r2, r2, #127
cmp r2, #(254 - 1)
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
cmp r3, #0x80000000
@@ -518,9 +518,12 @@ LSYM(Lml_1):
mov r3, #0
subs r2, r2, #1
-LSYM(Lml_u):
+ FUNC_END aeabi_fmul
+ FUNC_END mulsf3
+
+ARM_SYM_START Lml_u
@ Overflow?
- bgt LSYM(Lml_o)
+ bgt Lml_o
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r2, #(24 + 1)
@@ -540,10 +543,11 @@ LSYM(Lml_u):
do_it eq
biceq r0, r0, ip, lsr #31
RET
+ SYM_END Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START Lml_d
teq r2, #0
and ip, r0, #0x80000000
1: do_it eq, tt
@@ -561,8 +565,9 @@ LSYM(Lml_d):
beq 2b
orr r1, r1, ip
b LSYM(Lml_x)
+ SYM_END Lml_d
-LSYM(Lml_s):
+ARM_SYM_START Lml_s
@ Isolate the INF and NAN cases away
and r3, ip, r1, lsr #23
teq r2, ip
@@ -574,10 +579,11 @@ LSYM(Lml_s):
bics ip, r0, #0x80000000
do_it ne
COND(bic,s,ne) ip, r1, #0x80000000
- bne LSYM(Lml_d)
+ bne Lml_d
+ SYM_END Lml_s
@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START Lml_z
eor r0, r0, r1
bic r0, r0, #0x7fffffff
RET
@@ -589,39 +595,41 @@ LSYM(Lml_z):
moveq r0, r1
teqne r1, #0x0
teqne r1, #0x80000000
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ beq Lml_n @ 0 * INF or INF * 0 -> NAN
teq r2, ip
bne 1f
movs r2, r0, lsl #9
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+ bne Lml_n @ NAN * <anything> -> NAN
1: teq r3, ip
- bne LSYM(Lml_i)
+ bne Lml_i
movs r3, r1, lsl #9
do_it ne
movne r0, r1
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+ bne Lml_n @ <anything> * NAN -> NAN
+ SYM_END Lml_z
@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START Lml_i
eor r0, r0, r1
+ SYM_END Lml_i
@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
+ARM_SYM_START Lml_o
and r0, r0, #0x80000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00800000
RET
+ SYM_END Lml_o
@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START Lml_n
orr r0, r0, #0x7f000000
orr r0, r0, #0x00c00000
RET
+ SYM_END Lml_n
- FUNC_END aeabi_fmul
- FUNC_END mulsf3
-ARM_FUNC_START divsf3
+ARM_FUNC_START divsf3 function_section
ARM_FUNC_ALIAS aeabi_fdiv divsf3
@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -684,7 +692,7 @@ LSYM(Ldv_x):
@ Check exponent for under/overflow.
cmp r2, #(254 - 1)
- bhi LSYM(Lml_u)
+ bhi Lml_u
@ Round the result, merge final exponent.
cmp r3, r1
@@ -706,7 +714,7 @@ LSYM(Ldv_1):
orr r0, r0, #0x00800000
mov r3, #0
subs r2, r2, #1
- b LSYM(Lml_u)
+ b Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
@@ -735,17 +743,17 @@ LSYM(Ldv_s):
teq r2, ip
bne 1f
movs r2, r0, lsl #9
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ bne Lml_n @ NAN / <anything> -> NAN
teq r3, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
+ bne Lml_i @ INF / <anything> -> INF
mov r0, r1
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+ b Lml_n @ INF / (INF or NAN) -> NAN
1: teq r3, ip
bne 2f
movs r3, r1, lsl #9
- beq LSYM(Lml_z) @ <anything> / INF -> 0
+ beq Lml_z @ <anything> / INF -> 0
mov r0, r1
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b Lml_n @ <anything> / NAN -> NAN
2: @ If both are nonzero, we need to normalize and resume above.
bics ip, r0, #0x80000000
do_it ne
@@ -753,10 +761,10 @@ LSYM(Ldv_s):
bne LSYM(Ldv_d)
@ One or both arguments are zero.
bics r2, r0, #0x80000000
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bne Lml_i @ <non_zero> / 0 -> INF
bics r3, r1, #0x80000000
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
+ bne Lml_z @ 0 / <non_zero> -> 0
+ b Lml_n @ 0 / 0 -> NAN
FUNC_END aeabi_fdiv
FUNC_END divsf3
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2014-09-26 2:11 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-04 2:16 [PATCH 2/3,ARM,libgcc]Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc Tony Wang
-- strict thread matches above, loose matches on Subject: below --
2014-09-26 2:11 Tony Wang
2014-09-16 3:00 Tony Wang
2014-08-28 6:02 Tony Wang
2014-08-21 6:14 Tony Wang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).