* RE: [PATCH 2/3, ARM, libgcc, ping6] Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc
2015-01-27 8:11 ` Thomas Preud'homme
2015-01-27 8:33 ` Thomas Preud'homme
@ 2015-04-30 7:43 ` Thomas Preud'homme
1 sibling, 0 replies; 5+ messages in thread
From: Thomas Preud'homme @ 2015-04-30 7:43 UTC (permalink / raw)
To: Thomas Preud'homme, Richard Earnshaw, gcc-patches,
Marcus Shawcroft, Ramana Radhakrishnan
Here is an updated patch that prefix local symbols with __ for more safety.
They appear in the symtab as local so it is not strictly necessary but one is
never too cautious. Being local, they also do not generate any PLT entry.
They appear only because the jumps are from one section to another
(which is the whole purpose of this patch) and thus need a static relocation.
I hope this revised version address all your concerns.
ChangeLog entry is unchanged:
*** gcc/libgcc/ChangeLog ***
2015-04-30 Tony Wang <tony.wang@arm.com>
* config/arm/ieee754-sf.S: Expose symbols around fragment boundaries as function symbols.
* config/arm/ieee754-df.S: Same with above
diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S
index c1468dc..39b0028 100644
--- a/libgcc/config/arm/ieee754-df.S
+++ b/libgcc/config/arm/ieee754-df.S
@@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf
#ifdef L_arm_muldivdf3
-ARM_FUNC_START muldf3
+ARM_FUNC_START muldf3, function_section
ARM_FUNC_ALIAS aeabi_dmul muldf3
do_push {r4, r5, r6, lr}
@@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
COND(and,s,ne) r5, ip, yh, lsr #20
teqne r4, ip
teqne r5, ip
- bleq LSYM(Lml_s)
+ bleq __Lml_s
@ Add exponents together
add r4, r4, r5
@@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3
subs ip, r4, #(254 - 1)
do_it hi
cmphi ip, #0x700
- bhi LSYM(Lml_u)
+ bhi __Lml_u
@ Round the result, merge final exponent.
cmp lr, #0x80000000
@@ -716,9 +716,12 @@ LSYM(Lml_1):
mov lr, #0
subs r4, r4, #1
-LSYM(Lml_u):
+ FUNC_END aeabi_dmul
+ FUNC_END muldf3
+
+ARM_SYM_START __Lml_u
@ Overflow?
- bgt LSYM(Lml_o)
+ bgt __Lml_o
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r4, #(53 + 1)
@@ -778,10 +781,11 @@ LSYM(Lml_u):
do_it eq
biceq xl, xl, r3, lsr #31
RETLDM "r4, r5, r6"
+ SYM_END __Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START __Lml_d
teq r4, #0
bne 2f
and r6, xh, #0x80000000
@@ -804,8 +808,9 @@ LSYM(Lml_d):
beq 3b
orr yh, yh, r6
RET
+ SYM_END __Lml_d
-LSYM(Lml_s):
+ARM_SYM_START __Lml_s
@ Isolate the INF and NAN cases away
teq r4, ip
and r5, ip, yh, lsr #20
@@ -817,10 +822,11 @@ LSYM(Lml_s):
orrs r6, xl, xh, lsl #1
do_it ne
COND(orr,s,ne) r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
+ bne __Lml_d
+ SYM_END __Lml_s
@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START __Lml_z
eor xh, xh, yh
and xh, xh, #0x80000000
mov xl, #0
@@ -832,41 +838,42 @@ LSYM(Lml_z):
moveq xl, yl
moveq xh, yh
COND(orr,s,ne) r6, yl, yh, lsl #1
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ beq __Lml_n @ 0 * INF or INF * 0 -> NAN
teq r4, ip
bne 1f
orrs r6, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+ bne __Lml_n @ NAN * <anything> -> NAN
1: teq r5, ip
- bne LSYM(Lml_i)
+ bne __Lml_i
orrs r6, yl, yh, lsl #12
do_it ne, t
movne xl, yl
movne xh, yh
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+ bne __Lml_n @ <anything> * NAN -> NAN
+ SYM_END __Lml_z
@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START __Lml_i
eor xh, xh, yh
+ SYM_END __Lml_i
@ Overflow: return INF (sign already in xh).
-LSYM(Lml_o):
+ARM_SYM_START __Lml_o
and xh, xh, #0x80000000
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f00000
mov xl, #0
RETLDM "r4, r5, r6"
+ SYM_END __Lml_o
@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START __Lml_n
orr xh, xh, #0x7f000000
orr xh, xh, #0x00f80000
RETLDM "r4, r5, r6"
+ SYM_END __Lml_n
- FUNC_END aeabi_dmul
- FUNC_END muldf3
-
-ARM_FUNC_START divdf3
+ARM_FUNC_START divdf3 function_section
ARM_FUNC_ALIAS aeabi_ddiv divdf3
do_push {r4, r5, r6, lr}
@@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3
subs ip, r4, #(254 - 1)
do_it hi
cmphi ip, #0x700
- bhi LSYM(Lml_u)
+ bhi __Lml_u
@ Round the result, merge final exponent.
subs ip, r5, yh
@@ -1009,13 +1016,13 @@ LSYM(Ldv_1):
orr xh, xh, #0x00100000
mov lr, #0
subs r4, r4, #1
- b LSYM(Lml_u)
+ b __Lml_u
@ Result mightt need to be denormalized: put remainder bits
@ in lr for rounding considerations.
LSYM(Ldv_u):
orr lr, r5, r6
- b LSYM(Lml_u)
+ b __Lml_u
@ One or both arguments is either INF, NAN or zero.
LSYM(Ldv_s):
@@ -1023,34 +1030,34 @@ LSYM(Ldv_s):
teq r4, ip
do_it eq
teqeq r5, ip
- beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN
+ beq __Lml_n @ INF/NAN / INF/NAN -> NAN
teq r4, ip
bne 1f
orrs r4, xl, xh, lsl #12
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ bne __Lml_n @ NAN / <anything> -> NAN
teq r5, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
+ bne __Lml_i @ INF / <anything> -> INF
mov xl, yl
mov xh, yh
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+ b __Lml_n @ INF / (INF or NAN) -> NAN
1: teq r5, ip
bne 2f
orrs r5, yl, yh, lsl #12
- beq LSYM(Lml_z) @ <anything> / INF -> 0
+ beq __Lml_z @ <anything> / INF -> 0
mov xl, yl
mov xh, yh
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b __Lml_n @ <anything> / NAN -> NAN
2: @ If both are nonzero, we need to normalize and resume above.
orrs r6, xl, xh, lsl #1
do_it ne
COND(orr,s,ne) r6, yl, yh, lsl #1
- bne LSYM(Lml_d)
+ bne __Lml_d
@ One or both arguments are 0.
orrs r4, xl, xh, lsl #1
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bne __Lml_i @ <non_zero> / 0 -> INF
orrs r5, yl, yh, lsl #1
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
+ bne __Lml_z @ 0 / <non_zero> -> 0
+ b __Lml_n @ 0 / 0 -> NAN
FUNC_END aeabi_ddiv
FUNC_END divdf3
diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S
index bc44d4e..7c2ab8b 100644
--- a/libgcc/config/arm/ieee754-sf.S
+++ b/libgcc/config/arm/ieee754-sf.S
@@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf
#ifdef L_arm_muldivsf3
-ARM_FUNC_START mulsf3
+ARM_FUNC_START mulsf3, function_section
ARM_FUNC_ALIAS aeabi_fmul mulsf3
@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3
COND(and,s,ne) r3, ip, r1, lsr #23
teqne r2, ip
teqne r3, ip
- beq LSYM(Lml_s)
+ beq __Lml_s
LSYM(Lml_x):
@ Add exponents together
@@ -490,7 +490,7 @@ LSYM(Lml_x):
@ Apply exponent bias, check for under/overflow.
sbc r2, r2, #127
cmp r2, #(254 - 1)
- bhi LSYM(Lml_u)
+ bhi __Lml_u
@ Round the result, merge final exponent.
cmp r3, #0x80000000
@@ -518,9 +518,12 @@ LSYM(Lml_1):
mov r3, #0
subs r2, r2, #1
-LSYM(Lml_u):
+ FUNC_END aeabi_fmul
+ FUNC_END mulsf3
+
+ARM_SYM_START __Lml_u
@ Overflow?
- bgt LSYM(Lml_o)
+ bgt __Lml_o
@ Check if denormalized result is possible, otherwise return signed 0.
cmn r2, #(24 + 1)
@@ -540,10 +543,11 @@ LSYM(Lml_u):
do_it eq
biceq r0, r0, ip, lsr #31
RET
+ SYM_END __Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
-LSYM(Lml_d):
+ARM_SYM_START __Lml_d
teq r2, #0
and ip, r0, #0x80000000
1: do_it eq, tt
@@ -561,8 +565,9 @@ LSYM(Lml_d):
beq 2b
orr r1, r1, ip
b LSYM(Lml_x)
+ SYM_END __Lml_d
-LSYM(Lml_s):
+ARM_SYM_START __Lml_s
@ Isolate the INF and NAN cases away
and r3, ip, r1, lsr #23
teq r2, ip
@@ -574,10 +579,11 @@ LSYM(Lml_s):
bics ip, r0, #0x80000000
do_it ne
COND(bic,s,ne) ip, r1, #0x80000000
- bne LSYM(Lml_d)
+ bne __Lml_d
+ SYM_END __Lml_s
@ Result is 0, but determine sign anyway.
-LSYM(Lml_z):
+ARM_SYM_START __Lml_z
eor r0, r0, r1
bic r0, r0, #0x7fffffff
RET
@@ -589,39 +595,41 @@ LSYM(Lml_z):
moveq r0, r1
teqne r1, #0x0
teqne r1, #0x80000000
- beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN
+ beq __Lml_n @ 0 * INF or INF * 0 -> NAN
teq r2, ip
bne 1f
movs r2, r0, lsl #9
- bne LSYM(Lml_n) @ NAN * <anything> -> NAN
+ bne __Lml_n @ NAN * <anything> -> NAN
1: teq r3, ip
- bne LSYM(Lml_i)
+ bne __Lml_i
movs r3, r1, lsl #9
do_it ne
movne r0, r1
- bne LSYM(Lml_n) @ <anything> * NAN -> NAN
+ bne __Lml_n @ <anything> * NAN -> NAN
+ SYM_END __Lml_z
@ Result is INF, but we need to determine its sign.
-LSYM(Lml_i):
+ARM_SYM_START __Lml_i
eor r0, r0, r1
+ SYM_END __Lml_i
@ Overflow: return INF (sign already in r0).
-LSYM(Lml_o):
+ARM_SYM_START __Lml_o
and r0, r0, #0x80000000
orr r0, r0, #0x7f000000
orr r0, r0, #0x00800000
RET
+ SYM_END __Lml_o
@ Return a quiet NAN.
-LSYM(Lml_n):
+ARM_SYM_START __Lml_n
orr r0, r0, #0x7f000000
orr r0, r0, #0x00c00000
RET
+ SYM_END __Lml_n
- FUNC_END aeabi_fmul
- FUNC_END mulsf3
-ARM_FUNC_START divsf3
+ARM_FUNC_START divsf3 function_section
ARM_FUNC_ALIAS aeabi_fdiv divsf3
@ Mask out exponents, trap any zero/denormal/INF/NAN.
@@ -684,7 +692,7 @@ LSYM(Ldv_x):
@ Check exponent for under/overflow.
cmp r2, #(254 - 1)
- bhi LSYM(Lml_u)
+ bhi __Lml_u
@ Round the result, merge final exponent.
cmp r3, r1
@@ -706,7 +714,7 @@ LSYM(Ldv_1):
orr r0, r0, #0x00800000
mov r3, #0
subs r2, r2, #1
- b LSYM(Lml_u)
+ b __Lml_u
@ One or both arguments are denormalized.
@ Scale them leftwards and preserve sign bit.
@@ -735,17 +743,17 @@ LSYM(Ldv_s):
teq r2, ip
bne 1f
movs r2, r0, lsl #9
- bne LSYM(Lml_n) @ NAN / <anything> -> NAN
+ bne __Lml_n @ NAN / <anything> -> NAN
teq r3, ip
- bne LSYM(Lml_i) @ INF / <anything> -> INF
+ bne __Lml_i @ INF / <anything> -> INF
mov r0, r1
- b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN
+ b __Lml_n @ INF / (INF or NAN) -> NAN
1: teq r3, ip
bne 2f
movs r3, r1, lsl #9
- beq LSYM(Lml_z) @ <anything> / INF -> 0
+ beq __Lml_z @ <anything> / INF -> 0
mov r0, r1
- b LSYM(Lml_n) @ <anything> / NAN -> NAN
+ b __Lml_n @ <anything> / NAN -> NAN
2: @ If both are nonzero, we need to normalize and resume above.
bics ip, r0, #0x80000000
do_it ne
@@ -753,10 +761,10 @@ LSYM(Ldv_s):
bne LSYM(Ldv_d)
@ One or both arguments are zero.
bics r2, r0, #0x80000000
- bne LSYM(Lml_i) @ <non_zero> / 0 -> INF
+ bne __Lml_i @ <non_zero> / 0 -> INF
bics r3, r1, #0x80000000
- bne LSYM(Lml_z) @ 0 / <non_zero> -> 0
- b LSYM(Lml_n) @ 0 / 0 -> NAN
+ bne __Lml_z @ 0 / <non_zero> -> 0
+ b __Lml_n @ 0 / 0 -> NAN
FUNC_END aeabi_fdiv
FUNC_END divsf3
Is this ok for trunk?
Best regards,
Thomas
^ permalink raw reply [flat|nested] 5+ messages in thread