From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 69458 invoked by alias); 30 Apr 2015 07:19:26 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 69446 invoked by uid 89); 30 Apr 2015 07:19:25 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.1 required=5.0 tests=AWL,BAYES_00,KAM_STOCKGEN,SPF_PASS autolearn=no version=3.3.2 X-HELO: eu-smtp-delivery-143.mimecast.com Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (207.82.80.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Thu, 30 Apr 2015 07:19:23 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by uk-mta-4.uk.mimecast.lan; Thu, 30 Apr 2015 08:19:14 +0100 Received: from SHAWIN202 ([10.1.2.79]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Thu, 30 Apr 2015 08:19:14 +0100 From: "Thomas Preud'homme" To: "Thomas Preud'homme" , "Richard Earnshaw" , , "Marcus Shawcroft" , "Ramana Radhakrishnan" References: <000101d02ff1$1c67c190$553744b0$@arm.com> <54B681DC.3070004@arm.com> <001201d039f1$3b73c9d0$b25b5d70$@arm.com> In-Reply-To: Subject: RE: [PATCH 2/3, ARM, libgcc, ping6] Code size optimization for the fmul/fdiv and dmul/ddiv function in libgcc Date: Thu, 30 Apr 2015 07:43:00 -0000 Message-ID: <000d01d08315$eb8e5570$c2ab0050$@arm.com> MIME-Version: 1.0 X-MC-Unique: s6zfVbW_R_S172a1lTTcpQ-1 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable X-IsSubscribed: yes X-SW-Source: 2015-04/txt/msg01973.txt.bz2 Here is an updated patch that prefix local symbols with __ for more safety. They appear in the symtab as local so it is not strictly necessary but one = is never too cautious. Being local, they also do not generate any PLT entry. They appear only because the jumps are from one section to another (which is the whole purpose of this patch) and thus need a static relocatio= n. I hope this revised version address all your concerns. ChangeLog entry is unchanged: *** gcc/libgcc/ChangeLog *** 2015-04-30 Tony Wang * config/arm/ieee754-sf.S: Expose symbols around fragment boundarie= s as function symbols. * config/arm/ieee754-df.S: Same with above diff --git a/libgcc/config/arm/ieee754-df.S b/libgcc/config/arm/ieee754-df.S index c1468dc..39b0028 100644 --- a/libgcc/config/arm/ieee754-df.S +++ b/libgcc/config/arm/ieee754-df.S @@ -559,7 +559,7 @@ ARM_FUNC_ALIAS aeabi_l2d floatdidf =20 #ifdef L_arm_muldivdf3 =20 -ARM_FUNC_START muldf3 +ARM_FUNC_START muldf3, function_section ARM_FUNC_ALIAS aeabi_dmul muldf3 do_push {r4, r5, r6, lr} =20 @@ -571,7 +571,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3 COND(and,s,ne) r5, ip, yh, lsr #20 teqne r4, ip teqne r5, ip - bleq LSYM(Lml_s) + bleq __Lml_s =20 @ Add exponents together add r4, r4, r5 @@ -689,7 +689,7 @@ ARM_FUNC_ALIAS aeabi_dmul muldf3 subs ip, r4, #(254 - 1) do_it hi cmphi ip, #0x700 - bhi LSYM(Lml_u) + bhi __Lml_u =20 @ Round the result, merge final exponent. cmp lr, #0x80000000 @@ -716,9 +716,12 @@ LSYM(Lml_1): mov lr, #0 subs r4, r4, #1 =20 -LSYM(Lml_u): + FUNC_END aeabi_dmul + FUNC_END muldf3 + +ARM_SYM_START __Lml_u @ Overflow? - bgt LSYM(Lml_o) + bgt __Lml_o =20 @ Check if denormalized result is possible, otherwise return signed 0. cmn r4, #(53 + 1) @@ -778,10 +781,11 @@ LSYM(Lml_u): do_it eq biceq xl, xl, r3, lsr #31 RETLDM "r4, r5, r6" + SYM_END __Lml_u =20 @ One or both arguments are denormalized. @ Scale them leftwards and preserve sign bit. -LSYM(Lml_d): +ARM_SYM_START __Lml_d teq r4, #0 bne 2f and r6, xh, #0x80000000 @@ -804,8 +808,9 @@ LSYM(Lml_d): beq 3b orr yh, yh, r6 RET + SYM_END __Lml_d =20 -LSYM(Lml_s): +ARM_SYM_START __Lml_s @ Isolate the INF and NAN cases away teq r4, ip and r5, ip, yh, lsr #20 @@ -817,10 +822,11 @@ LSYM(Lml_s): orrs r6, xl, xh, lsl #1 do_it ne COND(orr,s,ne) r6, yl, yh, lsl #1 - bne LSYM(Lml_d) + bne __Lml_d + SYM_END __Lml_s =20 @ Result is 0, but determine sign anyway. -LSYM(Lml_z): +ARM_SYM_START __Lml_z eor xh, xh, yh and xh, xh, #0x80000000 mov xl, #0 @@ -832,41 +838,42 @@ LSYM(Lml_z): moveq xl, yl moveq xh, yh COND(orr,s,ne) r6, yl, yh, lsl #1 - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + beq __Lml_n @ 0 * INF or INF * 0 -> NAN teq r4, ip bne 1f orrs r6, xl, xh, lsl #12 - bne LSYM(Lml_n) @ NAN * -> NAN + bne __Lml_n @ NAN * -> NAN 1: teq r5, ip - bne LSYM(Lml_i) + bne __Lml_i orrs r6, yl, yh, lsl #12 do_it ne, t movne xl, yl movne xh, yh - bne LSYM(Lml_n) @ * NAN -> NAN + bne __Lml_n @ * NAN -> NAN + SYM_END __Lml_z =20 @ Result is INF, but we need to determine its sign. -LSYM(Lml_i): +ARM_SYM_START __Lml_i eor xh, xh, yh + SYM_END __Lml_i =20 @ Overflow: return INF (sign already in xh). -LSYM(Lml_o): +ARM_SYM_START __Lml_o and xh, xh, #0x80000000 orr xh, xh, #0x7f000000 orr xh, xh, #0x00f00000 mov xl, #0 RETLDM "r4, r5, r6" + SYM_END __Lml_o =20 @ Return a quiet NAN. -LSYM(Lml_n): +ARM_SYM_START __Lml_n orr xh, xh, #0x7f000000 orr xh, xh, #0x00f80000 RETLDM "r4, r5, r6" + SYM_END __Lml_n =20 - FUNC_END aeabi_dmul - FUNC_END muldf3 - -ARM_FUNC_START divdf3 +ARM_FUNC_START divdf3 function_section ARM_FUNC_ALIAS aeabi_ddiv divdf3 =20=09 do_push {r4, r5, r6, lr} @@ -985,7 +992,7 @@ ARM_FUNC_ALIAS aeabi_ddiv divdf3 subs ip, r4, #(254 - 1) do_it hi cmphi ip, #0x700 - bhi LSYM(Lml_u) + bhi __Lml_u =20 @ Round the result, merge final exponent. subs ip, r5, yh @@ -1009,13 +1016,13 @@ LSYM(Ldv_1): orr xh, xh, #0x00100000 mov lr, #0 subs r4, r4, #1 - b LSYM(Lml_u) + b __Lml_u =20 @ Result mightt need to be denormalized: put remainder bits @ in lr for rounding considerations. LSYM(Ldv_u): orr lr, r5, r6 - b LSYM(Lml_u) + b __Lml_u =20 @ One or both arguments is either INF, NAN or zero. LSYM(Ldv_s): @@ -1023,34 +1030,34 @@ LSYM(Ldv_s): teq r4, ip do_it eq teqeq r5, ip - beq LSYM(Lml_n) @ INF/NAN / INF/NAN -> NAN + beq __Lml_n @ INF/NAN / INF/NAN -> NAN teq r4, ip bne 1f orrs r4, xl, xh, lsl #12 - bne LSYM(Lml_n) @ NAN / -> NAN + bne __Lml_n @ NAN / -> NAN teq r5, ip - bne LSYM(Lml_i) @ INF / -> INF + bne __Lml_i @ INF / -> INF mov xl, yl mov xh, yh - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN + b __Lml_n @ INF / (INF or NAN) -> NAN 1: teq r5, ip bne 2f orrs r5, yl, yh, lsl #12 - beq LSYM(Lml_z) @ / INF -> 0 + beq __Lml_z @ / INF -> 0 mov xl, yl mov xh, yh - b LSYM(Lml_n) @ / NAN -> NAN + b __Lml_n @ / NAN -> NAN 2: @ If both are nonzero, we need to normalize and resume above. orrs r6, xl, xh, lsl #1 do_it ne COND(orr,s,ne) r6, yl, yh, lsl #1 - bne LSYM(Lml_d) + bne __Lml_d @ One or both arguments are 0. orrs r4, xl, xh, lsl #1 - bne LSYM(Lml_i) @ / 0 -> INF + bne __Lml_i @ / 0 -> INF orrs r5, yl, yh, lsl #1 - bne LSYM(Lml_z) @ 0 / -> 0 - b LSYM(Lml_n) @ 0 / 0 -> NAN + bne __Lml_z @ 0 / -> 0 + b __Lml_n @ 0 / 0 -> NAN =20 FUNC_END aeabi_ddiv FUNC_END divdf3 diff --git a/libgcc/config/arm/ieee754-sf.S b/libgcc/config/arm/ieee754-sf.S index bc44d4e..7c2ab8b 100644 --- a/libgcc/config/arm/ieee754-sf.S +++ b/libgcc/config/arm/ieee754-sf.S @@ -418,7 +418,7 @@ ARM_FUNC_ALIAS aeabi_l2f floatdisf =20 #ifdef L_arm_muldivsf3 =20 -ARM_FUNC_START mulsf3 +ARM_FUNC_START mulsf3, function_section ARM_FUNC_ALIAS aeabi_fmul mulsf3 =20 @ Mask out exponents, trap any zero/denormal/INF/NAN. @@ -428,7 +428,7 @@ ARM_FUNC_ALIAS aeabi_fmul mulsf3 COND(and,s,ne) r3, ip, r1, lsr #23 teqne r2, ip teqne r3, ip - beq LSYM(Lml_s) + beq __Lml_s LSYM(Lml_x): =20 @ Add exponents together @@ -490,7 +490,7 @@ LSYM(Lml_x): @ Apply exponent bias, check for under/overflow. sbc r2, r2, #127 cmp r2, #(254 - 1) - bhi LSYM(Lml_u) + bhi __Lml_u =20 @ Round the result, merge final exponent. cmp r3, #0x80000000 @@ -518,9 +518,12 @@ LSYM(Lml_1): mov r3, #0 subs r2, r2, #1 =20 -LSYM(Lml_u): + FUNC_END aeabi_fmul + FUNC_END mulsf3 + +ARM_SYM_START __Lml_u @ Overflow? - bgt LSYM(Lml_o) + bgt __Lml_o =20 @ Check if denormalized result is possible, otherwise return signed 0. cmn r2, #(24 + 1) @@ -540,10 +543,11 @@ LSYM(Lml_u): do_it eq biceq r0, r0, ip, lsr #31 RET + SYM_END __Lml_u =20 @ One or both arguments are denormalized. @ Scale them leftwards and preserve sign bit. -LSYM(Lml_d): +ARM_SYM_START __Lml_d teq r2, #0 and ip, r0, #0x80000000 1: do_it eq, tt @@ -561,8 +565,9 @@ LSYM(Lml_d): beq 2b orr r1, r1, ip b LSYM(Lml_x) + SYM_END __Lml_d =20 -LSYM(Lml_s): +ARM_SYM_START __Lml_s @ Isolate the INF and NAN cases away and r3, ip, r1, lsr #23 teq r2, ip @@ -574,10 +579,11 @@ LSYM(Lml_s): bics ip, r0, #0x80000000 do_it ne COND(bic,s,ne) ip, r1, #0x80000000 - bne LSYM(Lml_d) + bne __Lml_d + SYM_END __Lml_s =20 @ Result is 0, but determine sign anyway. -LSYM(Lml_z): +ARM_SYM_START __Lml_z eor r0, r0, r1 bic r0, r0, #0x7fffffff RET @@ -589,39 +595,41 @@ LSYM(Lml_z): moveq r0, r1 teqne r1, #0x0 teqne r1, #0x80000000 - beq LSYM(Lml_n) @ 0 * INF or INF * 0 -> NAN + beq __Lml_n @ 0 * INF or INF * 0 -> NAN teq r2, ip bne 1f movs r2, r0, lsl #9 - bne LSYM(Lml_n) @ NAN * -> NAN + bne __Lml_n @ NAN * -> NAN 1: teq r3, ip - bne LSYM(Lml_i) + bne __Lml_i movs r3, r1, lsl #9 do_it ne movne r0, r1 - bne LSYM(Lml_n) @ * NAN -> NAN + bne __Lml_n @ * NAN -> NAN + SYM_END __Lml_z =20 @ Result is INF, but we need to determine its sign. -LSYM(Lml_i): +ARM_SYM_START __Lml_i eor r0, r0, r1 + SYM_END __Lml_i =20 @ Overflow: return INF (sign already in r0). -LSYM(Lml_o): +ARM_SYM_START __Lml_o and r0, r0, #0x80000000 orr r0, r0, #0x7f000000 orr r0, r0, #0x00800000 RET + SYM_END __Lml_o =20 @ Return a quiet NAN. -LSYM(Lml_n): +ARM_SYM_START __Lml_n orr r0, r0, #0x7f000000 orr r0, r0, #0x00c00000 RET + SYM_END __Lml_n =20 - FUNC_END aeabi_fmul - FUNC_END mulsf3 =20 -ARM_FUNC_START divsf3 +ARM_FUNC_START divsf3 function_section ARM_FUNC_ALIAS aeabi_fdiv divsf3 =20 @ Mask out exponents, trap any zero/denormal/INF/NAN. @@ -684,7 +692,7 @@ LSYM(Ldv_x): =20 @ Check exponent for under/overflow. cmp r2, #(254 - 1) - bhi LSYM(Lml_u) + bhi __Lml_u =20 @ Round the result, merge final exponent. cmp r3, r1 @@ -706,7 +714,7 @@ LSYM(Ldv_1): orr r0, r0, #0x00800000 mov r3, #0 subs r2, r2, #1 - b LSYM(Lml_u) + b __Lml_u =20 @ One or both arguments are denormalized. @ Scale them leftwards and preserve sign bit. @@ -735,17 +743,17 @@ LSYM(Ldv_s): teq r2, ip bne 1f movs r2, r0, lsl #9 - bne LSYM(Lml_n) @ NAN / -> NAN + bne __Lml_n @ NAN / -> NAN teq r3, ip - bne LSYM(Lml_i) @ INF / -> INF + bne __Lml_i @ INF / -> INF mov r0, r1 - b LSYM(Lml_n) @ INF / (INF or NAN) -> NAN + b __Lml_n @ INF / (INF or NAN) -> NAN 1: teq r3, ip bne 2f movs r3, r1, lsl #9 - beq LSYM(Lml_z) @ / INF -> 0 + beq __Lml_z @ / INF -> 0 mov r0, r1 - b LSYM(Lml_n) @ / NAN -> NAN + b __Lml_n @ / NAN -> NAN 2: @ If both are nonzero, we need to normalize and resume above. bics ip, r0, #0x80000000 do_it ne @@ -753,10 +761,10 @@ LSYM(Ldv_s): bne LSYM(Ldv_d) @ One or both arguments are zero. bics r2, r0, #0x80000000 - bne LSYM(Lml_i) @ / 0 -> INF + bne __Lml_i @ / 0 -> INF bics r3, r1, #0x80000000 - bne LSYM(Lml_z) @ 0 / -> 0 - b LSYM(Lml_n) @ 0 / 0 -> NAN + bne __Lml_z @ 0 / -> 0 + b __Lml_n @ 0 / 0 -> NAN =20 FUNC_END aeabi_fdiv FUNC_END divsf3 Is this ok for trunk? Best regards, Thomas