From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by sourceware.org (Postfix) with ESMTP id 54F053858C30 for ; Tue, 22 Nov 2022 15:04:09 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 54F053858C30 Authentication-Results: sourceware.org; dmarc=none (p=none dis=none) header.from=foss.arm.com Authentication-Results: sourceware.org; spf=none smtp.mailfrom=foss.arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 96A391FB; Tue, 22 Nov 2022 07:04:15 -0800 (PST) Received: from [10.2.78.76] (unknown [10.2.78.76]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPSA id 8ED8A3F73D; Tue, 22 Nov 2022 07:04:08 -0800 (PST) Message-ID: Date: Tue, 22 Nov 2022 15:04:07 +0000 MIME-Version: 1.0 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.4.2 Subject: Re: [PATCH v4 2/8] newlib: libc: strcmp M-profile PACBTI-enablement Content-Language: en-GB To: "Victor L. Do Nascimento" , newlib@sourceware.org Cc: Richard.Earnshaw@arm.com References: From: Richard Earnshaw In-Reply-To: Content-Type: text/plain; charset=UTF-8; format=flowed Content-Transfer-Encoding: 7bit X-Spam-Status: No, score=-3495.8 required=5.0 tests=BAYES_00,GIT_PATCH_0,KAM_DMARC_STATUS,KAM_LAZY_DOMAIN_SECURITY,NICE_REPLY_A,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: On 26/10/2022 12:46, Victor L. Do Nascimento wrote: > Add function prologue/epilogue to conditionally add BTI landing pads > and/or PAC code generation & authentication instructions depending on > compilation flags. > > This patch enables PACBTI for all relevant variants of strcmp: > * Newlib for armv8.1-m.main+pacbti > * Newlib for armv8.1-m.main+pacbti+mve > * Newlib-nano > --- > newlib/libc/machine/arm/strcmp-arm-tiny.S | 8 ++++- > newlib/libc/machine/arm/strcmp-armv7.S | 44 +++++++++++++++-------- > newlib/libc/machine/arm/strcmp-armv7m.S | 26 +++++++++----- > 3 files changed, 54 insertions(+), 24 deletions(-) > > diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S b/newlib/libc/machine/arm/strcmp-arm-tiny.S > index 607a41daf..0bd2a2e6e 100644 > --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S > +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S > @@ -29,10 +29,14 @@ > /* Tiny version of strcmp in ARM state. Used only when optimizing > for size. Also supports Thumb-2. */ > > +#include "arm_asm.h" > + > .syntax unified > def_fn strcmp > + .fnstart > .cfi_sections .debug_frame > .cfi_startproc > + prologue > 1: > ldrb r2, [r0], #1 > ldrb r3, [r1], #1 > @@ -42,6 +46,8 @@ def_fn strcmp > beq 1b > 2: > subs r0, r2, r3 > - bx lr > + epilogue > .cfi_endproc > + .cantunwind > + .fnend > .size strcmp, . - strcmp > diff --git a/newlib/libc/machine/arm/strcmp-armv7.S b/newlib/libc/machine/arm/strcmp-armv7.S > index 2f93bfb73..26ba579ae 100644 > --- a/newlib/libc/machine/arm/strcmp-armv7.S > +++ b/newlib/libc/machine/arm/strcmp-armv7.S > @@ -45,6 +45,8 @@ > .thumb > .syntax unified > > +#include "arm_asm.h" > + > /* Parameters and result. */ > #define src1 r0 > #define src2 r1 > @@ -91,8 +93,9 @@ > ldrd r4, r5, [sp], #16 > .cfi_restore 4 > .cfi_restore 5 > + .cfi_adjust_cfa_offset -16 > sub result, result, r1, lsr #24 > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > #else > /* To use the big-endian trick we'd have to reverse all three words. > that's slower than this approach. */ > @@ -112,22 +115,30 @@ > ldrd r4, r5, [sp], #16 > .cfi_restore 4 > .cfi_restore 5 > + .cfi_adjust_cfa_offset -16 > sub result, result, r1 > > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > #endif > .endm > > + > .text > .p2align 5 > + .fnstart > + .cfi_sections .debug_frame > + .cfi_startproc > .Lstrcmp_start_addr: > #ifndef STRCMP_NO_PRECHECK > .Lfastpath_exit: > + .cfi_remember_state > sub r0, r2, r3 > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > nop > #endif > def_fn strcmp > + .cfi_restore_state I guess this is supposed to match the remember state above, but that's inside an ifdef and this isn't, so they are unbalanced. I think it would be better to move the fastpath_exit code further down, perhaps below the first epilogue sequence. Then we can clean up the entry part and make it more natural (in particular we can simplify the .size directive calculation). The rest of this patch is OK. R. > + prologue push_ip=HAVE_PAC_LEAF > #ifndef STRCMP_NO_PRECHECK > ldrb r2, [src1] > ldrb r3, [src2] > @@ -136,16 +147,14 @@ def_fn strcmp > cmpcs r2, r3 > bne .Lfastpath_exit > #endif > - .cfi_sections .debug_frame > - .cfi_startproc > strd r4, r5, [sp, #-16]! > - .cfi_def_cfa_offset 16 > - .cfi_offset 4, -16 > - .cfi_offset 5, -12 > + .cfi_adjust_cfa_offset 16 > + .cfi_rel_offset 4, 0 > + .cfi_rel_offset 5, 4 > orr tmp1, src1, src2 > strd r6, r7, [sp, #8] > - .cfi_offset 6, -8 > - .cfi_offset 7, -4 > + .cfi_rel_offset 6, 8 > + .cfi_rel_offset 7, 12 > mvn const_m1, #0 > lsl r2, tmp1, #29 > cbz r2, .Lloop_aligned8 > @@ -270,7 +279,6 @@ def_fn strcmp > ldr data1, [src1], #4 > beq .Laligned_m2 > bcs .Laligned_m1 > - > #ifdef STRCMP_NO_PRECHECK > ldrb data2, [src2, #1] > uxtb tmp1, data1, ror #BYTE1_OFFSET > @@ -314,7 +322,8 @@ def_fn strcmp > mov result, tmp1 > ldr r4, [sp], #16 > .cfi_restore 4 > - bx lr > + .cfi_adjust_cfa_offset -16 > + epilogue push_ip=HAVE_PAC_LEAF > > #ifndef STRCMP_NO_PRECHECK > .Laligned_m1: > @@ -364,8 +373,9 @@ def_fn strcmp > /* R6/7 Not used in this sequence. */ > .cfi_restore 6 > .cfi_restore 7 > + .cfi_adjust_cfa_offset -16 > neg result, result > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > > 6: > .cfi_restore_state > @@ -441,7 +451,8 @@ def_fn strcmp > /* R6/7 not used in this sequence. */ > .cfi_restore 6 > .cfi_restore 7 > - bx lr > + .cfi_adjust_cfa_offset -16 > + epilogue push_ip=HAVE_PAC_LEAF > > .Lstrcmp_tail: > .cfi_restore_state > @@ -463,7 +474,10 @@ def_fn strcmp > /* R6/7 not used in this sequence. */ > .cfi_restore 6 > .cfi_restore 7 > + .cfi_adjust_cfa_offset -16 > sub result, result, data2, lsr #24 > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > .cfi_endproc > + .cantunwind > + .fnend > .size strcmp, . - .Lstrcmp_start_addr > diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S b/newlib/libc/machine/arm/strcmp-armv7m.S > index cdb4912df..825b6e77f 100644 > --- a/newlib/libc/machine/arm/strcmp-armv7m.S > +++ b/newlib/libc/machine/arm/strcmp-armv7m.S > @@ -29,6 +29,8 @@ > /* Very similar to the generic code, but uses Thumb2 as implemented > in ARMv7-M. */ > > +#include "arm_asm.h" > + > /* Parameters and result. */ > #define src1 r0 > #define src2 r1 > @@ -44,8 +46,10 @@ > .thumb > .syntax unified > def_fn strcmp > + .fnstart > .cfi_sections .debug_frame > .cfi_startproc > + prologue push_ip=HAVE_PAC_LEAF > eor tmp1, src1, src2 > tst tmp1, #3 > /* Strings not at same byte offset from a word boundary. */ > @@ -82,6 +86,7 @@ def_fn strcmp > ldreq data2, [src2], #4 > beq 4b > 2: > + .cfi_remember_state > /* There's a zero or a different byte in the word */ > S2HI result, data1, #24 > S2LO data1, data1, #8 > @@ -106,7 +111,7 @@ def_fn strcmp > lsrs result, result, #24 > subs result, result, data2 > #endif > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > > > #if 0 > @@ -205,8 +210,10 @@ def_fn strcmp > > /* First of all, compare bytes until src1(sp1) is word-aligned. */ > .Lstrcmp_unaligned: > + .cfi_restore_state > tst src1, #3 > beq 2f > + .cfi_remember_state > ldrb data1, [src1], #1 > ldrb data2, [src2], #1 > cmp data1, #1 > @@ -214,12 +221,13 @@ def_fn strcmp > cmpcs data1, data2 > beq .Lstrcmp_unaligned > sub result, data1, data2 > - bx lr > + epilogue push_ip=HAVE_PAC_LEAF > > 2: > + .cfi_restore_state > stmfd sp!, {r5} > - .cfi_def_cfa_offset 4 > - .cfi_offset 5, -4 > + .cfi_adjust_cfa_offset 4 > + .cfi_rel_offset 5, 0 > > ldr data1, [src1], #4 > and tmp2, src2, #3 > @@ -355,8 +363,8 @@ def_fn strcmp > .cfi_remember_state > ldmfd sp!, {r5} > .cfi_restore 5 > - .cfi_def_cfa_offset 0 > - bx lr > + .cfi_adjust_cfa_offset -4 > + epilogue push_ip=HAVE_PAC_LEAF > > .Lstrcmp_tail: > .cfi_restore_state > @@ -372,7 +380,9 @@ def_fn strcmp > sub result, r2, result > ldmfd sp!, {r5} > .cfi_restore 5 > - .cfi_def_cfa_offset 0 > - bx lr > + .cfi_adjust_cfa_offset -4 > + epilogue push_ip=HAVE_PAC_LEAF > .cfi_endproc > + .cantunwind > + .fnend > .size strcmp, . - strcmp