public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
From: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>
To: "Victor L. Do Nascimento" <victor.donascimento@arm.com>
Cc: newlib@sourceware.org, richard.earnshaw@arm.com
Subject: Re: [PATCH v2 2/8] newlib: libc: strcmp M-profile PACBTI-enablement
Date: Fri, 5 Aug 2022 15:34:39 +0100	[thread overview]
Message-ID: <a6508968-2426-d046-0f6c-1efb4c923d0c@foss.arm.com> (raw)
In-Reply-To: <yw8j8ro3x8sc.fsf@arm.com>



On 05/08/2022 11:51, Victor L. Do Nascimento wrote:
> Richard Earnshaw <Richard.Earnshaw@foss.arm.com> writes:
> 
>> On 03/08/2022 16:35, Victor Do Nascimento wrote:
>>> Add function prologue/epilogue to conditionally add BTI landing pads
>>> and/or PAC code generation & authentication instructions depending on
>>> compilation flags.
>>> This patch enables PACBTI for all relevant variants of strcmp:
>>>        * Newlib for armv8.1-m.main+pacbti
>>>        * Newlib for armv8.1-m.main+pacbti+mve
>>>        * Newlib-nano
>>> ---
>>>    newlib/libc/machine/arm/strcmp-arm-tiny.S |  7 +++-
>>>    newlib/libc/machine/arm/strcmp-armv7.S    | 45 +++++++++++++++++------
>>>    newlib/libc/machine/arm/strcmp-armv7m.S   | 36 +++++++++++++++---
>>>    3 files changed, 71 insertions(+), 17 deletions(-)
>>> diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>>> b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>>> index 607a41daf..8085eb4df 100644
>>> --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>>> +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>>> @@ -29,10 +29,14 @@
>>>    /* Tiny version of strcmp in ARM state.  Used only when optimizing
>>>       for size.  Also supports Thumb-2.  */
>>>    +#include "pacbti.h"
>>> +
>>>    	.syntax unified
>>>    def_fn strcmp
>>> +	.fnstart
>>>    	.cfi_sections .debug_frame
>>>    	.cfi_startproc
>>> +	pacbti_prologue
>>
>> See comment on patch1 about the names of these macros.
>>
>>>    1:
>>>    	ldrb	r2, [r0], #1
>>>    	ldrb	r3, [r1], #1
>>> @@ -42,6 +46,7 @@ def_fn strcmp
>>>    	beq	1b
>>>    2:
>>>    	subs	r0, r2, r3
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>    	.cfi_endproc
>>> +	.fnend
>>>    	.size	strcmp, . - strcmp
>>> diff --git a/newlib/libc/machine/arm/strcmp-armv7.S b/newlib/libc/machine/arm/strcmp-armv7.S
>>> index 2f93bfb73..584c89f7e 100644
>>> --- a/newlib/libc/machine/arm/strcmp-armv7.S
>>> +++ b/newlib/libc/machine/arm/strcmp-armv7.S
>>> @@ -45,6 +45,8 @@
>>>    	.thumb
>>>    	.syntax unified
>>>    +#include "pacbti.h"
>>> +
>>>    /* Parameters and result.  */
>>>    #define src1		r0
>>>    #define src2		r1
>>> @@ -91,8 +93,9 @@
>>>    	ldrd	r4, r5, [sp], #16
>>>    	.cfi_restore 4
>>>    	.cfi_restore 5
>>> +	.cfi_adjust_cfa_offset -16
>>
>> This looks like it's an omission from the original file and thus
>> should be separated out as a separate patch.
>>
>>>    	sub	result, result, r1, lsr #24
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>    #else
>>>    	/* To use the big-endian trick we'd have to reverse all three words.
>>>    	   that's slower than this approach.  */
>>> @@ -112,22 +115,28 @@
>>>    	ldrd	r4, r5, [sp], #16
>>>    	.cfi_restore 4
>>>    	.cfi_restore 5
>>> +	.cfi_adjust_cfa_offset -16
>>>    	sub	result, result, r1
>>>    -	bx	lr
>>> +	pacbti_epilogue
>>>    #endif
>>>    	.endm
>>>    +
>>>    	.text
>>>    	.p2align	5
>>> +	.fnstart
>>> +	.cfi_sections .debug_frame
>>> +	.cfi_startproc
>>>    .Lstrcmp_start_addr:
>>>    #ifndef STRCMP_NO_PRECHECK
>>>    .Lfastpath_exit:
>>>    	sub	r0, r2, r3
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>    	nop
>>>    #endif
>>>    def_fn	strcmp
>>> +	pacbti_prologue
>>>    #ifndef STRCMP_NO_PRECHECK
>>>    	ldrb	r2, [src1]
>>>    	ldrb	r3, [src2]
>>> @@ -136,16 +145,26 @@ def_fn	strcmp
>>>    	cmpcs	r2, r3
>>>    	bne	.Lfastpath_exit
>>>    #endif
>>> -	.cfi_sections .debug_frame
>>> -	.cfi_startproc
>>>    	strd	r4, r5, [sp, #-16]!
>>> -	.cfi_def_cfa_offset 16
>>> +	.save {r4, r5}
>>
>> Hmm, I've just remembered that leaf functions cannot throw exceptions
>> (the EHABI only supports synchronous exceptions), so I don't think we
>> need .save directives at all.  Instead, such functions should be
>> marked with ".cantunwind".  You still want the dwarf unwind
>> information as that's for the debugger, it's just the '.save'
>> directives that aren't needed.
>>
>>> +	.cfi_adjust_cfa_offset 16
>>> +#if __HAVE_PAC_LEAF
>>> +	.cfi_offset 4, -20
>>> +	.cfi_offset 5, -16
>>> +#else
>>>    	.cfi_offset 4, -16
>>>    	.cfi_offset 5, -12
>>> +#endif /* __HAVE_PAC_LEAF */
>>
>> Ugh!  This is not the right way to do this.  Instead (see comments on
>> patch 1) use a macro that defines the additional adjustment to use in
>> various contexts, something like
>>
>> 	.cfi_offset 4, -(16+PAC_UNALIGNED_CFI_ADJ)
>>
>>>    	orr	tmp1, src1, src2
>>>    	strd	r6, r7, [sp, #8]
>>> +	.save {r6, r7}
>>> +#if __HAVE_PAC_LEAF
>>> +	.cfi_offset 6, -12
>>> +	.cfi_offset 7, -8
>>> +#else
>>>    	.cfi_offset 6, -8
>>>    	.cfi_offset 7, -4
>>> +#endif /* __HAVE_PAC_LEAF */
>>>    	mvn	const_m1, #0
>>>    	lsl	r2, tmp1, #29
>>>    	cbz	r2, .Lloop_aligned8
>>> @@ -270,7 +289,6 @@ def_fn	strcmp
>>>    	ldr	data1, [src1], #4
>>>    	beq	.Laligned_m2
>>>    	bcs	.Laligned_m1
>>> -
>>>    #ifdef STRCMP_NO_PRECHECK
>>>    	ldrb	data2, [src2, #1]
>>>    	uxtb	tmp1, data1, ror #BYTE1_OFFSET
>>> @@ -314,7 +332,8 @@ def_fn	strcmp
>>>    	mov	result, tmp1
>>>    	ldr	r4, [sp], #16
>>>    	.cfi_restore 4
>>> -	bx	lr
>>> +	.cfi_adjust_cfa_offset -16
>>> +	pacbti_epilogue
>>>      #ifndef STRCMP_NO_PRECHECK
>>>    .Laligned_m1:
>>> @@ -364,8 +383,9 @@ def_fn	strcmp
>>>    	/* R6/7 Not used in this sequence.  */
>>>    	.cfi_restore 6
>>>    	.cfi_restore 7
>>> +	.cfi_adjust_cfa_offset -16
>>>    	neg	result, result
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>      6:
>>>    	.cfi_restore_state
>>> @@ -441,7 +461,8 @@ def_fn	strcmp
>>>    	/* R6/7 not used in this sequence.  */
>>>    	.cfi_restore 6
>>>    	.cfi_restore 7
>>> -	bx	lr
>>> +	.cfi_adjust_cfa_offset -16
>>> +	pacbti_epilogue
>>>      .Lstrcmp_tail:
>>>    	.cfi_restore_state
>>> @@ -463,7 +484,9 @@ def_fn	strcmp
>>>    	/* R6/7 not used in this sequence.  */
>>>    	.cfi_restore 6
>>>    	.cfi_restore 7
>>> +	.cfi_adjust_cfa_offset -16
>>>    	sub	result, result, data2, lsr #24
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>    	.cfi_endproc
>>> +	.fnend
>>>    	.size strcmp, . - .Lstrcmp_start_addr
>>> diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S b/newlib/libc/machine/arm/strcmp-armv7m.S
>>> index cdb4912df..fe1519f4d 100644
>>> --- a/newlib/libc/machine/arm/strcmp-armv7m.S
>>> +++ b/newlib/libc/machine/arm/strcmp-armv7m.S
>>> @@ -29,6 +29,8 @@
>>>    /* Very similar to the generic code, but uses Thumb2 as implemented
>>>       in ARMv7-M.  */
>>>    +#include "pacbti.h"
>>> +
>>>    /* Parameters and result.  */
>>>    #define src1		r0
>>>    #define src2		r1
>>> @@ -44,8 +46,10 @@
>>>    	.thumb
>>>    	.syntax unified
>>>    def_fn strcmp
>>> +	.fnstart
>>>    	.cfi_sections .debug_frame
>>>    	.cfi_startproc
>>> +	pacbti_prologue
>>>    	eor	tmp1, src1, src2
>>>    	tst	tmp1, #3
>>>    	/* Strings not at same byte offset from a word boundary.  */
>>> @@ -106,7 +110,7 @@ def_fn strcmp
>>>    	lsrs	result, result, #24
>>>    	subs	result, result, data2
>>>    #endif
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>        #if 0
>>> @@ -214,12 +218,18 @@ def_fn strcmp
>>>    	cmpcs	data1, data2
>>>    	beq	.Lstrcmp_unaligned
>>>    	sub	result, data1, data2
>>> -	bx	lr
>>> +	pacbti_epilogue
>>>      2:
>>>    	stmfd	sp!, {r5}
>>> -	.cfi_def_cfa_offset 4
>>> +	.save {r5}
>>> +	.cfi_adjust_cfa_offset 4
>>> +#if __HAVE_PAC_LEAF
>>> +	.cfi_offset 5, -8 /* Account for ip register already on stack.  */
>>> +#else
>>>    	.cfi_offset 5, -4
>>> +#endif /* __HAVE_PAC_LEAF */
>>> +
>>>      	ldr	data1, [src1], #4
>>>    	and	tmp2, src2, #3
>>> @@ -353,10 +363,17 @@ def_fn strcmp
>>>    .Lstrcmp_done_equal:
>>>    	mov	result, #0
>>>    	.cfi_remember_state
>>> +#if __HAVE_PAC_LEAF
>>> +	pop {r5, ip}
>>> +	.cfi_restore 5
>>> +	.cfi_restore 143
>>> +	aut ip, lr, sp
>>> +#else
>>>    	ldmfd	sp!, {r5}
>>>    	.cfi_restore 5
>>> +#endif /* __HAVE_PAC_LEAF */
>>
>> I think we could define a macro for this as well, something like
>>          pop_reg_and_pac r5
> 
> This sounds good, but a survey through relevant function epilogues
> reveals a series of different `pop' scenarios. On the one hand, we have
> pop {r5, ip}, as we have here but on another we also have, for example,
> pop {r4,r5,r6,r7,ip}.
> 
> Do we wish to provide a short-hand notation for one pop operation but
> not the other via macros? My initial understanding is that assembler
> macros don't provide us with the necessary flexibility to handle all
> relevant cases.
> 
> I think the use of a macro here would greatly enhance code readability
> and by extension maintainability, but at first sight it feels inelegant
> to provide a solution applicable only to a single case of a more general
> problem.
> 
> Any wisdom here would be appreciated.

Yes, this is all quite tricky.  Fortunately, the vast majority of time 
registers saved on the stack frame are consecutively numbered.  So I 
think it's possible to write some macros to handle this.  The example 
below is not complete (I'll leave you to flesh out all the different 
cases :), but I think it shows that we can achieve what's needed.

@ Emit .cfi_offset directives for a consecutive sequence of registers
	.macro cfisavelist first, last, count
	.cfi_offset \last, -4*(\count)
	.if \last-\first
	cfisavelist \first, \last-1, \count+1
	.endif
	.endm

@ Create a prologue entry sequence handling PAC/BTI, if required and 
emitting
@ CFI directives for 	
	.macro prologue first=-1, last=-1, savepac=1
	.if \first != -1
	.if \last != -1
	.if \savepac
	push {r\first-r\last, ip}
	.cfi_offset 143, -4
	cfisavelist \first, \last, 2
	.else
	push {r\first-r\last}
	cfisavelist \first, \last, 1
	.endif
	.else
	push {r\first}
	cfisavelist \first, \first, 1
	.endif
	.else
	...
	.endif
	.endm

	.text
	.global foo
	.type foo, %function
foo:
	.cfi_startproc
	prologue 4, 7, savepac=0
	.cfi_endproc

	.text
	.global bar
	.type bar, %function
bar:
	.cfi_startproc
	prologue 4, 7
	.cfi_endproc

	.text
	.global wom
	.type wom, %function
wom:
	.cfi_startproc
	prologue 4, savepac=0
	.cfi_endproc
	
	.text
	.global bat
	.type bat, %function
bat:
	.cfi_startproc
	prologue savepac=0
	.cfi_endproc

R.
> 
> V.
> 
>> While you're doing that, please change the single-register ldmfd into 'pop'.
>>
>>>    	.cfi_def_cfa_offset 0
>>> -	bx	lr
>>> +	bx lr
>>
>> stray change to the BX instruction.
>>
>>>      .Lstrcmp_tail:
>>>    	.cfi_restore_state
>>> @@ -370,9 +387,18 @@ def_fn strcmp
>>>    	S2LOEQ	data2, data2, #8
>>>    	beq	.Lstrcmp_tail
>>>    	sub	result, r2, result
>>> +#if __HAVE_PAC_LEAF
>>> +	pop {r5, ip}
>>> +	.cfi_restore 5
>>> +	.cfi_restore 143
>>> +	.cfi_def_cfa_offset 0
>>> +	aut ip, lr, sp
>>> +#else
>>>    	ldmfd	sp!, {r5}
>>>    	.cfi_restore 5
>>>    	.cfi_def_cfa_offset 0
>>> -	bx	lr
>>> +#endif /* __HAVE_PAC_LEAF */
>>
>> Same as above
>>
>>> +	bx lr
>>
>> tab between mnemonic and operands.
>>
>>>    	.cfi_endproc
>>> +	.fnend
>>>    	.size strcmp, . - strcmp
>>
>> R.

  reply	other threads:[~2022-08-05 14:34 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-03 15:35 [PATCH v2 0/8] Implement assembly cortex-M PACBTI functionality Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 1/8] newlib: libc: define M-profile PACBTI-enablement macros Victor Do Nascimento
2022-08-04 15:19   ` Richard Earnshaw
2022-08-05  9:21     ` Victor L. Do Nascimento
2022-08-16 16:11     ` Victor L. Do Nascimento
2022-08-18 16:28       ` Victor L. Do Nascimento
2022-08-03 15:35 ` [PATCH v2 2/8] newlib: libc: strcmp M-profile PACBTI-enablement Victor Do Nascimento
2022-08-04 15:48   ` Richard Earnshaw
2022-08-05 10:51     ` Victor L. Do Nascimento
2022-08-05 14:34       ` Richard Earnshaw [this message]
2022-08-05 14:58         ` Victor L. Do Nascimento
2022-08-05 15:15           ` Richard Earnshaw
2022-08-03 15:35 ` [PATCH v2 3/8] newlib: libc: strlen " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 4/8] newlib: libc: memchr " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 5/8] newlib: libc: memcpy " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 6/8] newlib: libc: setjmp/longjmp " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 7/8] newlib: libc: aeabi_memmove " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 8/8] newlib: libc: aeabi_memset " Victor Do Nascimento

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a6508968-2426-d046-0f6c-1efb4c923d0c@foss.arm.com \
    --to=richard.earnshaw@foss.arm.com \
    --cc=newlib@sourceware.org \
    --cc=richard.earnshaw@arm.com \
    --cc=victor.donascimento@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).