public inbox for newlib@sourceware.org
 help / color / mirror / Atom feed
From: Richard Earnshaw <Richard.Earnshaw@foss.arm.com>
To: "Victor L. Do Nascimento" <victor.donascimento@arm.com>,
	newlib@sourceware.org
Cc: Richard.Earnshaw@arm.com
Subject: Re: [PATCH v4 2/8] newlib: libc: strcmp M-profile PACBTI-enablement
Date: Tue, 22 Nov 2022 15:04:07 +0000	[thread overview]
Message-ID: <a764b9c6-c6fc-4162-851d-c6a9132c9cdc@foss.arm.com> (raw)
In-Reply-To: <yw8j35balsde.fsf@arm.com>



On 26/10/2022 12:46, Victor L. Do Nascimento wrote:
> Add function prologue/epilogue to conditionally add BTI landing pads
> and/or PAC code generation & authentication instructions depending on
> compilation flags.
> 
> This patch enables PACBTI for all relevant variants of strcmp:
>       * Newlib for armv8.1-m.main+pacbti
>       * Newlib for armv8.1-m.main+pacbti+mve
>       * Newlib-nano
> ---
>   newlib/libc/machine/arm/strcmp-arm-tiny.S |  8 ++++-
>   newlib/libc/machine/arm/strcmp-armv7.S    | 44 +++++++++++++++--------
>   newlib/libc/machine/arm/strcmp-armv7m.S   | 26 +++++++++-----
>   3 files changed, 54 insertions(+), 24 deletions(-)
> 
> diff --git a/newlib/libc/machine/arm/strcmp-arm-tiny.S b/newlib/libc/machine/arm/strcmp-arm-tiny.S
> index 607a41daf..0bd2a2e6e 100644
> --- a/newlib/libc/machine/arm/strcmp-arm-tiny.S
> +++ b/newlib/libc/machine/arm/strcmp-arm-tiny.S
> @@ -29,10 +29,14 @@
>   /* Tiny version of strcmp in ARM state.  Used only when optimizing
>      for size.  Also supports Thumb-2.  */
>   
> +#include "arm_asm.h"
> +
>   	.syntax unified
>   def_fn strcmp
> +	.fnstart
>   	.cfi_sections .debug_frame
>   	.cfi_startproc
> +	prologue
>   1:
>   	ldrb	r2, [r0], #1
>   	ldrb	r3, [r1], #1
> @@ -42,6 +46,8 @@ def_fn strcmp
>   	beq	1b
>   2:
>   	subs	r0, r2, r3
> -	bx	lr
> +	epilogue
>   	.cfi_endproc
> +	.cantunwind
> +	.fnend
>   	.size	strcmp, . - strcmp
> diff --git a/newlib/libc/machine/arm/strcmp-armv7.S b/newlib/libc/machine/arm/strcmp-armv7.S
> index 2f93bfb73..26ba579ae 100644
> --- a/newlib/libc/machine/arm/strcmp-armv7.S
> +++ b/newlib/libc/machine/arm/strcmp-armv7.S
> @@ -45,6 +45,8 @@
>   	.thumb
>   	.syntax unified
>   
> +#include "arm_asm.h"
> +
>   /* Parameters and result.  */
>   #define src1		r0
>   #define src2		r1
> @@ -91,8 +93,9 @@
>   	ldrd	r4, r5, [sp], #16
>   	.cfi_restore 4
>   	.cfi_restore 5
> +	.cfi_adjust_cfa_offset -16
>   	sub	result, result, r1, lsr #24
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   #else
>   	/* To use the big-endian trick we'd have to reverse all three words.
>   	   that's slower than this approach.  */
> @@ -112,22 +115,30 @@
>   	ldrd	r4, r5, [sp], #16
>   	.cfi_restore 4
>   	.cfi_restore 5
> +	.cfi_adjust_cfa_offset -16
>   	sub	result, result, r1
>   
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   #endif
>   	.endm
>   
> +
>   	.text
>   	.p2align	5
> +	.fnstart
> +	.cfi_sections .debug_frame
> +	.cfi_startproc
>   .Lstrcmp_start_addr:
>   #ifndef STRCMP_NO_PRECHECK
>   .Lfastpath_exit:
> +	.cfi_remember_state
>   	sub	r0, r2, r3
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   	nop
>   #endif
>   def_fn	strcmp
> +	.cfi_restore_state

I guess this is supposed to match the remember state above, but that's 
inside an ifdef and this isn't, so they are unbalanced.

I think it would be better to move the fastpath_exit code further down, 
perhaps below the first epilogue sequence.  Then we can clean up the 
entry part and make it more natural (in particular we can simplify the 
.size directive calculation).

The rest of this patch is OK.

R.


> +	prologue push_ip=HAVE_PAC_LEAF
>   #ifndef STRCMP_NO_PRECHECK
>   	ldrb	r2, [src1]
>   	ldrb	r3, [src2]
> @@ -136,16 +147,14 @@ def_fn	strcmp
>   	cmpcs	r2, r3
>   	bne	.Lfastpath_exit
>   #endif
> -	.cfi_sections .debug_frame
> -	.cfi_startproc
>   	strd	r4, r5, [sp, #-16]!
> -	.cfi_def_cfa_offset 16
> -	.cfi_offset 4, -16
> -	.cfi_offset 5, -12
> +	.cfi_adjust_cfa_offset 16
> +	.cfi_rel_offset 4, 0
> +	.cfi_rel_offset 5, 4
>   	orr	tmp1, src1, src2
>   	strd	r6, r7, [sp, #8]
> -	.cfi_offset 6, -8
> -	.cfi_offset 7, -4
> +	.cfi_rel_offset 6, 8
> +	.cfi_rel_offset 7, 12
>   	mvn	const_m1, #0
>   	lsl	r2, tmp1, #29
>   	cbz	r2, .Lloop_aligned8
> @@ -270,7 +279,6 @@ def_fn	strcmp
>   	ldr	data1, [src1], #4
>   	beq	.Laligned_m2
>   	bcs	.Laligned_m1
> -
>   #ifdef STRCMP_NO_PRECHECK
>   	ldrb	data2, [src2, #1]
>   	uxtb	tmp1, data1, ror #BYTE1_OFFSET
> @@ -314,7 +322,8 @@ def_fn	strcmp
>   	mov	result, tmp1
>   	ldr	r4, [sp], #16
>   	.cfi_restore 4
> -	bx	lr
> +	.cfi_adjust_cfa_offset -16
> +	epilogue push_ip=HAVE_PAC_LEAF
>   
>   #ifndef STRCMP_NO_PRECHECK
>   .Laligned_m1:
> @@ -364,8 +373,9 @@ def_fn	strcmp
>   	/* R6/7 Not used in this sequence.  */
>   	.cfi_restore 6
>   	.cfi_restore 7
> +	.cfi_adjust_cfa_offset -16
>   	neg	result, result
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   
>   6:
>   	.cfi_restore_state
> @@ -441,7 +451,8 @@ def_fn	strcmp
>   	/* R6/7 not used in this sequence.  */
>   	.cfi_restore 6
>   	.cfi_restore 7
> -	bx	lr
> +	.cfi_adjust_cfa_offset -16
> +	epilogue push_ip=HAVE_PAC_LEAF
>   
>   .Lstrcmp_tail:
>   	.cfi_restore_state
> @@ -463,7 +474,10 @@ def_fn	strcmp
>   	/* R6/7 not used in this sequence.  */
>   	.cfi_restore 6
>   	.cfi_restore 7
> +	.cfi_adjust_cfa_offset -16
>   	sub	result, result, data2, lsr #24
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   	.cfi_endproc
> +	.cantunwind
> +	.fnend
>   	.size strcmp, . - .Lstrcmp_start_addr
> diff --git a/newlib/libc/machine/arm/strcmp-armv7m.S b/newlib/libc/machine/arm/strcmp-armv7m.S
> index cdb4912df..825b6e77f 100644
> --- a/newlib/libc/machine/arm/strcmp-armv7m.S
> +++ b/newlib/libc/machine/arm/strcmp-armv7m.S
> @@ -29,6 +29,8 @@
>   /* Very similar to the generic code, but uses Thumb2 as implemented
>      in ARMv7-M.  */
>   
> +#include "arm_asm.h"
> +
>   /* Parameters and result.  */
>   #define src1		r0
>   #define src2		r1
> @@ -44,8 +46,10 @@
>   	.thumb
>   	.syntax unified
>   def_fn strcmp
> +	.fnstart
>   	.cfi_sections .debug_frame
>   	.cfi_startproc
> +	prologue push_ip=HAVE_PAC_LEAF
>   	eor	tmp1, src1, src2
>   	tst	tmp1, #3
>   	/* Strings not at same byte offset from a word boundary.  */
> @@ -82,6 +86,7 @@ def_fn strcmp
>   	ldreq	data2, [src2], #4
>   	beq	4b
>   2:
> +	.cfi_remember_state
>   	/* There's a zero or a different byte in the word */
>   	S2HI	result, data1, #24
>   	S2LO	data1, data1, #8
> @@ -106,7 +111,7 @@ def_fn strcmp
>   	lsrs	result, result, #24
>   	subs	result, result, data2
>   #endif
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   
>   
>   #if 0
> @@ -205,8 +210,10 @@ def_fn strcmp
>   
>   	/* First of all, compare bytes until src1(sp1) is word-aligned. */
>   .Lstrcmp_unaligned:
> +	.cfi_restore_state
>   	tst	src1, #3
>   	beq	2f
> +	.cfi_remember_state
>   	ldrb	data1, [src1], #1
>   	ldrb	data2, [src2], #1
>   	cmp	data1, #1
> @@ -214,12 +221,13 @@ def_fn strcmp
>   	cmpcs	data1, data2
>   	beq	.Lstrcmp_unaligned
>   	sub	result, data1, data2
> -	bx	lr
> +	epilogue push_ip=HAVE_PAC_LEAF
>   
>   2:
> +	.cfi_restore_state
>   	stmfd	sp!, {r5}
> -	.cfi_def_cfa_offset 4
> -	.cfi_offset 5, -4
> +	.cfi_adjust_cfa_offset 4
> +	.cfi_rel_offset 5, 0
>   
>   	ldr	data1, [src1], #4
>   	and	tmp2, src2, #3
> @@ -355,8 +363,8 @@ def_fn strcmp
>   	.cfi_remember_state
>   	ldmfd	sp!, {r5}
>   	.cfi_restore 5
> -	.cfi_def_cfa_offset 0
> -	bx	lr
> +	.cfi_adjust_cfa_offset -4
> +	epilogue push_ip=HAVE_PAC_LEAF
>   
>   .Lstrcmp_tail:
>   	.cfi_restore_state
> @@ -372,7 +380,9 @@ def_fn strcmp
>   	sub	result, r2, result
>   	ldmfd	sp!, {r5}
>   	.cfi_restore 5
> -	.cfi_def_cfa_offset 0
> -	bx	lr
> +	.cfi_adjust_cfa_offset -4
> +	epilogue push_ip=HAVE_PAC_LEAF
>   	.cfi_endproc
> +	.cantunwind
> +	.fnend
>   	.size strcmp, . - strcmp

  reply	other threads:[~2022-11-22 15:04 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-26 11:37 [PATCH v4 0/8] Implement assembly cortex-M PACBTI functionality Victor L. Do Nascimento
2022-10-26 11:45 ` [PATCH v4 1/8] newlib: libc: define M-profile PACBTI-enablement macros Victor L. Do Nascimento
2022-11-22 15:04   ` Richard Earnshaw
2022-10-26 11:46 ` [PATCH v4 2/8] newlib: libc: strcmp M-profile PACBTI-enablement Victor L. Do Nascimento
2022-11-22 15:04   ` Richard Earnshaw [this message]
2022-10-26 11:47 ` [PATCH v4 3/8] newlib: libc: strlen " Victor L. Do Nascimento
2022-11-22 15:20   ` Richard Earnshaw
2022-10-26 11:49 ` [PATCH v4 4/8] newlib: libc: memchr " Victor L. Do Nascimento
2022-11-22 15:33   ` Richard Earnshaw
2022-10-26 11:50 ` [PATCH v4 5/8] newlib: libc: memcpy " Victor L. Do Nascimento
2022-11-22 16:03   ` Richard Earnshaw
2022-10-26 11:51 ` [PATCH v4 6/8] newlib: libc: setjmp/longjmp " Victor L. Do Nascimento
2022-11-22 16:17   ` Richard Earnshaw
2022-10-26 11:52 ` [PATCH v4 7/8] newlib: libc: aeabi_memmove " Victor L. Do Nascimento
2022-11-22 16:18   ` Richard Earnshaw
2022-10-26 11:53 ` [PATCH v4 8/8] newlib: libc: aeabi_memset " Victor L. Do Nascimento
2022-11-22 16:19   ` Richard Earnshaw

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a764b9c6-c6fc-4162-851d-c6a9132c9cdc@foss.arm.com \
    --to=richard.earnshaw@foss.arm.com \
    --cc=Richard.Earnshaw@arm.com \
    --cc=newlib@sourceware.org \
    --cc=victor.donascimento@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).