public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Fix runtime linker auditing on aarch64
@ 2020-09-23  1:16 Ben Woodard
  2020-09-23 12:22 ` Florian Weimer
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Ben Woodard @ 2020-09-23  1:16 UTC (permalink / raw)
  To: libc-alpha

The dynamic linker's auditing was not working on aarch64. See PR#26643
https://sourceware.org/bugzilla/show_bug.cgi?id=26643

There were two distinct problems:
  * _dl_runtime_resolve was not preserving x8 the indirect result location
    register.
  * The NEON Q registers pushed onto the stack by _dl_runtime_resolve
    were twice the size of D registers extracted from the stack frame by
    _dl_runtime_profile.

To fix this
  * The La_aarch64_regs structure was expanded to include x8 and the full
    sized NEON V registers that are required to be preserved by the ABI.
  * _dl_runtime_profile needed to extract registers saved by
    _dl_runtime_resolve and put them into the new correctly sized
    La_aarch64_regs structure.
  * The return value structure La_aarch64_retval also didn't have the correctly
    sized NEON V registers.

As a couple of additional cleanups
  * The names of the NEON registers saved within the La_aarch64_regs and the
    La_aarch_retval structures referred to the old D registers which were
    doubles. Now the registers are quads and are called V for vector registers.
    So the name of the field in the structure and the names of the offsets
    within that structure were named to use the more modern names.
  * The ABI specification says that r0-r7 + r8 the indirect result location
    register as well as the NEON v0-v7 registers can be used to return values
    from a function. Therefore, I addded those to the La_aarch64_retval
    structure so that it also correctly matches the ABI.

An additional problem not addressed by this patch is what to do about the
changes to the aarch64 ABI needed to support SVE. A discussion about what to
do about that was begun on libc-alpha here:
https://sourceware.org/pipermail/libc-alpha/2020-September/117797.html
---
 sysdeps/aarch64/bits/link.h     | 17 ++++----
 sysdeps/aarch64/dl-link.sym     |  4 +-
 sysdeps/aarch64/dl-trampoline.S | 75 +++++++++++++++++++++------------
 3 files changed, 59 insertions(+), 37 deletions(-)

diff --git a/sysdeps/aarch64/bits/link.h b/sysdeps/aarch64/bits/link.h
index 0c54e6ea7b..2b43ace57c 100644
--- a/sysdeps/aarch64/bits/link.h
+++ b/sysdeps/aarch64/bits/link.h
@@ -23,19 +23,20 @@
 /* Registers for entry into PLT on AArch64.  */
 typedef struct La_aarch64_regs
 {
-  uint64_t lr_xreg[8];
-  uint64_t lr_dreg[8];
-  uint64_t lr_sp;
-  uint64_t lr_lr;
+  uint64_t    lr_xreg[9];
+  __uint128_t lr_vreg[8];
+  uint64_t    lr_sp;
+  uint64_t    lr_lr;
 } La_aarch64_regs;
 
 /* Return values for calls from PLT on AArch64.  */
 typedef struct La_aarch64_retval
 {
-  /* Up to two integer registers can be used for a return value.  */
-  uint64_t lrv_xreg[2];
-  /* Up to four D registers can be used for a return value.  */
-  uint64_t lrv_dreg[4];
+  /* Up to eight integer registers and the indirect result location register
+     can be used for a return value.  */
+  uint64_t    lrv_xreg[9];
+  /* Up to eight V registers can be used for a return value.  */
+  __uint128_t lrv_vreg[8];
 
 } La_aarch64_retval;
 __BEGIN_DECLS
diff --git a/sysdeps/aarch64/dl-link.sym b/sysdeps/aarch64/dl-link.sym
index d67d28b40c..70d153a1d5 100644
--- a/sysdeps/aarch64/dl-link.sym
+++ b/sysdeps/aarch64/dl-link.sym
@@ -7,9 +7,9 @@ DL_SIZEOF_RG		sizeof(struct La_aarch64_regs)
 DL_SIZEOF_RV		sizeof(struct La_aarch64_retval)
 
 DL_OFFSET_RG_X0		offsetof(struct La_aarch64_regs, lr_xreg)
-DL_OFFSET_RG_D0		offsetof(struct La_aarch64_regs, lr_dreg)
+DL_OFFSET_RG_V0		offsetof(struct La_aarch64_regs, lr_vreg)
 DL_OFFSET_RG_SP		offsetof(struct La_aarch64_regs, lr_sp)
 DL_OFFSET_RG_LR		offsetof(struct La_aarch64_regs, lr_lr)
 
 DL_OFFSET_RV_X0		offsetof(struct La_aarch64_retval, lrv_xreg)
-DL_OFFSET_RV_D0		offsetof(struct La_aarch64_retval, lrv_dreg)
+DL_OFFSET_RV_V0		offsetof(struct La_aarch64_retval, lrv_vreg)
diff --git a/sysdeps/aarch64/dl-trampoline.S b/sysdeps/aarch64/dl-trampoline.S
index 794876fffa..c91341e8fc 100644
--- a/sysdeps/aarch64/dl-trampoline.S
+++ b/sysdeps/aarch64/dl-trampoline.S
@@ -46,6 +46,8 @@ _dl_runtime_resolve:
 	cfi_rel_offset (lr, 8)
 
 	/* Save arguments.  */
+	/* Note: Saving x9 is not required by the ABI but the assember requires
+	   the immediate values of operand 3 to be a multiple of 16 */
 	stp	x8, x9, [sp, #-(80+8*16)]!
 	cfi_adjust_cfa_offset (80+8*16)
 	cfi_rel_offset (x8, 0)
@@ -183,19 +185,23 @@ _dl_runtime_profile:
 	stp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
 	cfi_rel_offset (x6, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 0)
 	cfi_rel_offset (x7, OFFSET_RG + DL_OFFSET_RG_X0 + 16*3 + 8)
-
-	stp	d0, d1, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	cfi_rel_offset (d0, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0)
-	cfi_rel_offset (d1, OFFSET_RG + DL_OFFSET_RG_D0 + 16*0 + 8)
-	stp	d2, d3, [X29, #OFFSET_RG+ DL_OFFSET_RG_D0 + 16*1]
-	cfi_rel_offset (d2, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 0)
-	cfi_rel_offset (d3, OFFSET_RG + DL_OFFSET_RG_D0 + 16*1 + 8)
-	stp	d4, d5, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	cfi_rel_offset (d4, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 0)
-	cfi_rel_offset (d5, OFFSET_RG + DL_OFFSET_RG_D0 + 16*2 + 8)
-	stp	d6, d7, [X29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
-	cfi_rel_offset (d6, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 0)
-	cfi_rel_offset (d7, OFFSET_RG + DL_OFFSET_RG_D0 + 16*3 + 8)
+	str	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0]
+	cfi_rel_offset (x8, OFFSET_RG + DL_OFFSET_RG_X0 + 16*4 + 0)
+	/* Note X9 is in the stack frame for alignment but it is not
+	   required to be saved by the ABI */
+
+	stp	q0, q1, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	cfi_rel_offset (q0, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0)
+	cfi_rel_offset (q1, OFFSET_RG + DL_OFFSET_RG_V0 + 32*0 + 16)
+	stp	q2, q3, [X29, #OFFSET_RG+ DL_OFFSET_RG_V0 + 32*1]
+	cfi_rel_offset (q2, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 0)
+	cfi_rel_offset (q3, OFFSET_RG + DL_OFFSET_RG_V0 + 32*1 + 16)
+	stp	q4, q5, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	cfi_rel_offset (q4, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 0)
+	cfi_rel_offset (q5, OFFSET_RG + DL_OFFSET_RG_V0 + 32*2 + 16)
+	stp	q6, q7, [X29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
+	cfi_rel_offset (q6, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 0)
+	cfi_rel_offset (q7, OFFSET_RG + DL_OFFSET_RG_V0 + 32*3 + 16)
 
 	add     x0, x29, #SF_SIZE + 16
 	ldr	x1, [x29, #OFFSET_LR]
@@ -234,10 +240,11 @@ _dl_runtime_profile:
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldr	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 16*0]
+	ldp	q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
 
 	cfi_def_cfa_register (sp)
 	ldp	x29, x30, [x29, #0]
@@ -280,14 +287,21 @@ _dl_runtime_profile:
 	ldp	x2, x3, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*1]
 	ldp	x4, x5, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*2]
 	ldp	x6, x7, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*3]
-	ldp	d0, d1, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*1]
-	ldp	d4, d5, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*2]
-	ldp	d6, d7, [x29, #OFFSET_RG + DL_OFFSET_RG_D0 + 16*3]
+	ldr	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RG + DL_OFFSET_RG_V0 + 32*3]
 	blr	ip0
-	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	stp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	stp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	stp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+	stp	x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+	stp	x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+	stp	x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+	str	x8, [x29, #OFFSET_RG + DL_OFFSET_RG_X0 + 16*4]
+	stp	q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+	stp	q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+	stp	q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+	stp	q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
 
 	/* Setup call to pltexit  */
 	ldp	x0, x1, [x29, #OFFSET_SAVED_CALL_X0]
@@ -295,9 +309,16 @@ _dl_runtime_profile:
 	add	x3, x29, #OFFSET_RV
 	bl	_dl_call_pltexit
 
-	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0]
-	ldp	d0, d1, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*0]
-	ldp	d2, d3, [x29, #OFFSET_RV + DL_OFFSET_RV_D0 + 16*1]
+	ldp	x0, x1, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*0]
+	ldp	x2, x3, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*1]
+	ldp	x4, x5, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*2]
+	ldp	x6, x7, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*3]
+	ldr	x8, [x29, #OFFSET_RV + DL_OFFSET_RV_X0 + 16*4]
+	ldp	q0, q1, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*0]
+	ldp	q2, q3, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*1]
+	ldp	q4, q5, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*2]
+	ldp	q6, q7, [x29, #OFFSET_RV + DL_OFFSET_RV_V0 + 32*3]
+
 	/* LR from within La_aarch64_reg */
 	ldr	lr, [x29, #OFFSET_RG + DL_OFFSET_RG_LR]
 	cfi_restore(lr)
-- 
2.26.2


^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2020-09-24 11:10 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-23  1:16 [PATCH] Fix runtime linker auditing on aarch64 Ben Woodard
2020-09-23 12:22 ` Florian Weimer
2020-09-23 12:48   ` Szabolcs Nagy
2020-09-23 12:56     ` Florian Weimer
2020-09-23 13:18       ` Szabolcs Nagy
2020-09-24 11:10         ` Florian Weimer
2020-09-23 12:34 ` Szabolcs Nagy
2020-09-23 15:10   ` Ben Coyote Woodard
2020-09-23 16:19     ` Szabolcs Nagy
2020-09-23 18:12       ` Ben Coyote Woodard
2020-09-23 18:00   ` Ben Coyote Woodard
2020-09-24  0:30 ` Carlos O'Donell
2020-09-24  3:14   ` Ben Coyote Woodard
2020-09-24  8:04     ` Szabolcs Nagy
2020-09-24  9:58     ` Florian Weimer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).