public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
From: "Victor L. Do Nascimento" <victor.donascimento@arm.com>
To: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>
Cc: <newlib@sourceware.org>, <richard.earnshaw@arm.com>
Subject: Re: [PATCH v2 2/8] newlib: libc: strcmp M-profile PACBTI-enablement
Date: Fri, 5 Aug 2022 11:51:47 +0100	[thread overview]
Message-ID: <yw8j8ro3x8sc.fsf@arm.com> (raw)
In-Reply-To: <e2f801e4-b7f4-726d-d65a-36aaa902ed7a@foss.arm.com> (Richard Earnshaw's message of "Thu, 4 Aug 2022 16:48:08 +0100")

Richard Earnshaw <Richard.Earnshaw@foss.arm.com> writes:

> On 03/08/2022 16:35, Victor Do Nascimento wrote:
>> Add function prologue/epilogue to conditionally add BTI landing pads
>> and/or PAC code generation & authentication instructions depending on
>> compilation flags.
>> This patch enables PACBTI for all relevant variants of strcmp:
>>       * Newlib for armv8.1-m.main+pacbti
>>       * Newlib for armv8.1-m.main+pacbti+mve
>>       * Newlib-nano
>> ---
>>   newlib/libc/machine/arm/strcmp-arm-tiny.S |  7 +++-
>>   newlib/libc/machine/arm/strcmp-armv7.S    | 45 +++++++++++++++++------
>>   newlib/libc/machine/arm/strcmp-armv7m.S   | 36 +++++++++++++++---
>>   3 files changed, 71 insertions(+), 17 deletions(-)
>> diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> index 607a41daf..8085eb4df 100644
>> --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S
>> @@ -29,10 +29,14 @@
>>   /* Tiny version of strcmp in ARM state.  Used only when optimizing
>>      for size.  Also supports Thumb-2.  */
>>   +#include "pacbti.h"
>> +
>>   	.syntax unified
>>   def_fn strcmp
>> +	.fnstart
>>   	.cfi_sections .debug_frame
>>   	.cfi_startproc
>> +	pacbti_prologue
>
> See comment on patch1 about the names of these macros.
>
>>   1:
>>   	ldrb	r2, [r0], #1
>>   	ldrb	r3, [r1], #1
>> @@ -42,6 +46,7 @@ def_fn strcmp
>>   	beq	1b
>>   2:
>>   	subs	r0, r2, r3
>> -	bx	lr
>> +	pacbti_epilogue
>>   	.cfi_endproc
>> +	.fnend
>>   	.size	strcmp, . - strcmp
>> diff --git a/newlib/libc/machine/arm/strcmp-armv7.S b/newlib/libc/machine/arm/strcmp-armv7.S
>> index 2f93bfb73..584c89f7e 100644
>> --- a/newlib/libc/machine/arm/strcmp-armv7.S
>> +++ b/newlib/libc/machine/arm/strcmp-armv7.S
>> @@ -45,6 +45,8 @@
>>   	.thumb
>>   	.syntax unified
>>   +#include "pacbti.h"
>> +
>>   /* Parameters and result.  */
>>   #define src1		r0
>>   #define src2		r1
>> @@ -91,8 +93,9 @@
>>   	ldrd	r4, r5, [sp], #16
>>   	.cfi_restore 4
>>   	.cfi_restore 5
>> +	.cfi_adjust_cfa_offset -16
>
> This looks like it's an omission from the original file and thus
> should be separated out as a separate patch.
>
>>   	sub	result, result, r1, lsr #24
>> -	bx	lr
>> +	pacbti_epilogue
>>   #else
>>   	/* To use the big-endian trick we'd have to reverse all three words.
>>   	   that's slower than this approach.  */
>> @@ -112,22 +115,28 @@
>>   	ldrd	r4, r5, [sp], #16
>>   	.cfi_restore 4
>>   	.cfi_restore 5
>> +	.cfi_adjust_cfa_offset -16
>>   	sub	result, result, r1
>>   -	bx	lr
>> +	pacbti_epilogue
>>   #endif
>>   	.endm
>>   +
>>   	.text
>>   	.p2align	5
>> +	.fnstart
>> +	.cfi_sections .debug_frame
>> +	.cfi_startproc
>>   .Lstrcmp_start_addr:
>>   #ifndef STRCMP_NO_PRECHECK
>>   .Lfastpath_exit:
>>   	sub	r0, r2, r3
>> -	bx	lr
>> +	pacbti_epilogue
>>   	nop
>>   #endif
>>   def_fn	strcmp
>> +	pacbti_prologue
>>   #ifndef STRCMP_NO_PRECHECK
>>   	ldrb	r2, [src1]
>>   	ldrb	r3, [src2]
>> @@ -136,16 +145,26 @@ def_fn	strcmp
>>   	cmpcs	r2, r3
>>   	bne	.Lfastpath_exit
>>   #endif
>> -	.cfi_sections .debug_frame
>> -	.cfi_startproc
>>   	strd	r4, r5, [sp, #-16]!
>> -	.cfi_def_cfa_offset 16
>> +	.save {r4, r5}
>
> Hmm, I've just remembered that leaf functions cannot throw exceptions
> (the EHABI only supports synchronous exceptions), so I don't think we
> need .save directives at all.  Instead, such functions should be
> marked with ".cantunwind".  You still want the dwarf unwind
> information as that's for the debugger, it's just the '.save'
> directives that aren't needed.
>
>> +	.cfi_adjust_cfa_offset 16
>> +#if __HAVE_PAC_LEAF
>> +	.cfi_offset 4, -20
>> +	.cfi_offset 5, -16
>> +#else
>>   	.cfi_offset 4, -16
>>   	.cfi_offset 5, -12
>> +#endif /* __HAVE_PAC_LEAF */
>
> Ugh!  This is not the right way to do this.  Instead (see comments on
> patch 1) use a macro that defines the additional adjustment to use in
> various contexts, something like
>
> 	.cfi_offset 4, -(16+PAC_UNALIGNED_CFI_ADJ)
>
>>   	orr	tmp1, src1, src2
>>   	strd	r6, r7, [sp, #8]
>> +	.save {r6, r7}
>> +#if __HAVE_PAC_LEAF
>> +	.cfi_offset 6, -12
>> +	.cfi_offset 7, -8
>> +#else
>>   	.cfi_offset 6, -8
>>   	.cfi_offset 7, -4
>> +#endif /* __HAVE_PAC_LEAF */
>>   	mvn	const_m1, #0
>>   	lsl	r2, tmp1, #29
>>   	cbz	r2, .Lloop_aligned8
>> @@ -270,7 +289,6 @@ def_fn	strcmp
>>   	ldr	data1, [src1], #4
>>   	beq	.Laligned_m2
>>   	bcs	.Laligned_m1
>> -
>>   #ifdef STRCMP_NO_PRECHECK
>>   	ldrb	data2, [src2, #1]
>>   	uxtb	tmp1, data1, ror #BYTE1_OFFSET
>> @@ -314,7 +332,8 @@ def_fn	strcmp
>>   	mov	result, tmp1
>>   	ldr	r4, [sp], #16
>>   	.cfi_restore 4
>> -	bx	lr
>> +	.cfi_adjust_cfa_offset -16
>> +	pacbti_epilogue
>>     #ifndef STRCMP_NO_PRECHECK
>>   .Laligned_m1:
>> @@ -364,8 +383,9 @@ def_fn	strcmp
>>   	/* R6/7 Not used in this sequence.  */
>>   	.cfi_restore 6
>>   	.cfi_restore 7
>> +	.cfi_adjust_cfa_offset -16
>>   	neg	result, result
>> -	bx	lr
>> +	pacbti_epilogue
>>     6:
>>   	.cfi_restore_state
>> @@ -441,7 +461,8 @@ def_fn	strcmp
>>   	/* R6/7 not used in this sequence.  */
>>   	.cfi_restore 6
>>   	.cfi_restore 7
>> -	bx	lr
>> +	.cfi_adjust_cfa_offset -16
>> +	pacbti_epilogue
>>     .Lstrcmp_tail:
>>   	.cfi_restore_state
>> @@ -463,7 +484,9 @@ def_fn	strcmp
>>   	/* R6/7 not used in this sequence.  */
>>   	.cfi_restore 6
>>   	.cfi_restore 7
>> +	.cfi_adjust_cfa_offset -16
>>   	sub	result, result, data2, lsr #24
>> -	bx	lr
>> +	pacbti_epilogue
>>   	.cfi_endproc
>> +	.fnend
>>   	.size strcmp, . - .Lstrcmp_start_addr
>> diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S b/newlib/libc/machine/arm/strcmp-armv7m.S
>> index cdb4912df..fe1519f4d 100644
>> --- a/newlib/libc/machine/arm/strcmp-armv7m.S
>> +++ b/newlib/libc/machine/arm/strcmp-armv7m.S
>> @@ -29,6 +29,8 @@
>>   /* Very similar to the generic code, but uses Thumb2 as implemented
>>      in ARMv7-M.  */
>>   +#include "pacbti.h"
>> +
>>   /* Parameters and result.  */
>>   #define src1		r0
>>   #define src2		r1
>> @@ -44,8 +46,10 @@
>>   	.thumb
>>   	.syntax unified
>>   def_fn strcmp
>> +	.fnstart
>>   	.cfi_sections .debug_frame
>>   	.cfi_startproc
>> +	pacbti_prologue
>>   	eor	tmp1, src1, src2
>>   	tst	tmp1, #3
>>   	/* Strings not at same byte offset from a word boundary.  */
>> @@ -106,7 +110,7 @@ def_fn strcmp
>>   	lsrs	result, result, #24
>>   	subs	result, result, data2
>>   #endif
>> -	bx	lr
>> +	pacbti_epilogue
>>       #if 0
>> @@ -214,12 +218,18 @@ def_fn strcmp
>>   	cmpcs	data1, data2
>>   	beq	.Lstrcmp_unaligned
>>   	sub	result, data1, data2
>> -	bx	lr
>> +	pacbti_epilogue
>>     2:
>>   	stmfd	sp!, {r5}
>> -	.cfi_def_cfa_offset 4
>> +	.save {r5}
>> +	.cfi_adjust_cfa_offset 4
>> +#if __HAVE_PAC_LEAF
>> +	.cfi_offset 5, -8 /* Account for ip register already on stack.  */
>> +#else
>>   	.cfi_offset 5, -4
>> +#endif /* __HAVE_PAC_LEAF */
>> +
>>     	ldr	data1, [src1], #4
>>   	and	tmp2, src2, #3
>> @@ -353,10 +363,17 @@ def_fn strcmp
>>   .Lstrcmp_done_equal:
>>   	mov	result, #0
>>   	.cfi_remember_state
>> +#if __HAVE_PAC_LEAF
>> +	pop {r5, ip}
>> +	.cfi_restore 5
>> +	.cfi_restore 143
>> +	aut ip, lr, sp
>> +#else
>>   	ldmfd	sp!, {r5}
>>   	.cfi_restore 5
>> +#endif /* __HAVE_PAC_LEAF */
>
> I think we could define a macro for this as well, something like
>         pop_reg_and_pac r5

This sounds good, but a survey through relevant function epilogues
reveals a series of different `pop' scenarios. On the one hand, we have
pop {r5, ip}, as we have here but on another we also have, for example,
pop {r4,r5,r6,r7,ip}.

Do we wish to provide a short-hand notation for one pop operation but
not the other via macros? My initial understanding is that assembler
macros don't provide us with the necessary flexibility to handle all
relevant cases.

I think the use of a macro here would greatly enhance code readability
and by extension maintainability, but at first sight it feels inelegant
to provide a solution applicable only to a single case of a more general
problem.

Any wisdom here would be appreciated.

V.

> While you're doing that, please change the single-register ldmfd into 'pop'.
>
>>   	.cfi_def_cfa_offset 0
>> -	bx	lr
>> +	bx lr
>
> stray change to the BX instruction.
>
>>     .Lstrcmp_tail:
>>   	.cfi_restore_state
>> @@ -370,9 +387,18 @@ def_fn strcmp
>>   	S2LOEQ	data2, data2, #8
>>   	beq	.Lstrcmp_tail
>>   	sub	result, r2, result
>> +#if __HAVE_PAC_LEAF
>> +	pop {r5, ip}
>> +	.cfi_restore 5
>> +	.cfi_restore 143
>> +	.cfi_def_cfa_offset 0
>> +	aut ip, lr, sp
>> +#else
>>   	ldmfd	sp!, {r5}
>>   	.cfi_restore 5
>>   	.cfi_def_cfa_offset 0
>> -	bx	lr
>> +#endif /* __HAVE_PAC_LEAF */
>
> Same as above
>
>> +	bx lr
>
> tab between mnemonic and operands.
>
>>   	.cfi_endproc
>> +	.fnend
>>   	.size strcmp, . - strcmp
>
> R.

  reply	other threads:[~2022-08-05 10:52 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-03 15:35 [PATCH v2 0/8] Implement assembly cortex-M PACBTI functionality Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 1/8] newlib: libc: define M-profile PACBTI-enablement macros Victor Do Nascimento
2022-08-04 15:19   ` Richard Earnshaw
2022-08-05  9:21     ` Victor L. Do Nascimento
2022-08-16 16:11     ` Victor L. Do Nascimento
2022-08-18 16:28       ` Victor L. Do Nascimento
2022-08-03 15:35 ` [PATCH v2 2/8] newlib: libc: strcmp M-profile PACBTI-enablement Victor Do Nascimento
2022-08-04 15:48   ` Richard Earnshaw
2022-08-05 10:51     ` Victor L. Do Nascimento [this message]
2022-08-05 14:34       ` Richard Earnshaw
2022-08-05 14:58         ` Victor L. Do Nascimento
2022-08-05 15:15           ` Richard Earnshaw
2022-08-03 15:35 ` [PATCH v2 3/8] newlib: libc: strlen " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 4/8] newlib: libc: memchr " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 5/8] newlib: libc: memcpy " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 6/8] newlib: libc: setjmp/longjmp " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 7/8] newlib: libc: aeabi_memmove " Victor Do Nascimento
2022-08-03 15:35 ` [PATCH v2 8/8] newlib: libc: aeabi_memset " Victor Do Nascimento

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=yw8j8ro3x8sc.fsf@arm.com \
    --to=victor.donascimento@arm.com \
    --cc=Richard.Earnshaw@foss.arm.com \
    --cc=newlib@sourceware.org \
    --cc=richard.earnshaw@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).