public inbox for glibc-cvs@sourceware.org
help / color / mirror / Atom feed
* [glibc/arm/morello/main] aarch64: morello: dynamic linking support
@ 2022-11-23 14:48 Szabolcs Nagy
  0 siblings, 0 replies; 3+ messages in thread
From: Szabolcs Nagy @ 2022-11-23 14:48 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=d94c03efc59ba18c71e105a5bcbfec19e0cd91d8

commit d94c03efc59ba18c71e105a5bcbfec19e0cd91d8
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Tue Mar 29 15:24:38 2022 +0100

    aarch64: morello: dynamic linking support
    
    Add morello specific dl-machine.h.
    
    Add morello dynamic relocation processing support for purecap ABI.
    Only support R_AARCH64_NONE, R_AARCH64_ABS64 and R_AARCH64_RELATIVE
    dynamic relocs from the lp64 ABI.
    
    RELATIVE and IRELATIVE relocs use a helper function from cheri-rel.h
    to construct a capability.  Also fixed the IRELATIVE handling for
    static linking.
    
    Use new machine routines on morello for load address computation so it
    is a valid capability:
    
     void *elf_machine_runtime_dynamic (void)
     void elf_machine_rtld_base_setup (struct link_map *map, void *args)
    
    The ld.so load address and RX, RW capabilities are derived from auxv
    and the RW ranges are set up based on the ld.so program headers early.
    
    __tls_get_addr should return a bounded pointer instead of fixing it in
    _dl_make_tlsdesc_dynamic, this is done in a separate patch.

Diff:
---
 sysdeps/aarch64/ldsodefs.h           |   5 +
 sysdeps/aarch64/linkmap.h            |   2 +-
 sysdeps/aarch64/morello/dl-irel.h    |  68 +++++
 sysdeps/aarch64/morello/dl-machine.h | 471 +++++++++++++++++++++++++++++++++++
 sysdeps/aarch64/morello/dl-tlsdesc.S | 229 +++++++++++++++++
 sysdeps/aarch64/morello/dl-tlsdesc.h |  62 +++++
 sysdeps/aarch64/morello/tlsdesc.sym  |  19 ++
 sysdeps/aarch64/sys/ifunc.h          |   2 +-
 8 files changed, 856 insertions(+), 2 deletions(-)

diff --git a/sysdeps/aarch64/ldsodefs.h b/sysdeps/aarch64/ldsodefs.h
index ab42b05f6c..b0b23df93c 100644
--- a/sysdeps/aarch64/ldsodefs.h
+++ b/sysdeps/aarch64/ldsodefs.h
@@ -22,6 +22,11 @@
 #include <elf.h>
 #include <cpu-features.h>
 
+#ifdef __CHERI_PURE_CAPABILITY__
+# define DO_ELF_MACHINE_REL_RELATIVE(map, l_addr, relative) \
+  elf_machine_rela_relative (map, relative)
+#endif
+
 struct La_aarch64_regs;
 struct La_aarch64_retval;
 
diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
index 1cf59dbdf8..7a7bcde4a9 100644
--- a/sysdeps/aarch64/linkmap.h
+++ b/sysdeps/aarch64/linkmap.h
@@ -20,7 +20,7 @@
 
 struct link_map_machine
 {
-  ElfW(Addr) plt;	  /* Address of .plt */
+  elfptr_t plt;		  /* Address of .plt */
   void *tlsdesc_table;	  /* Address of TLS descriptor hash table.  */
   bool bti_fail;	  /* Failed to enable Branch Target Identification.  */
 };
diff --git a/sysdeps/aarch64/morello/dl-irel.h b/sysdeps/aarch64/morello/dl-irel.h
new file mode 100644
index 0000000000..e12d29a089
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-irel.h
@@ -0,0 +1,68 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+   AArch64 version.
+   Copyright (C) 2012-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <sysdep.h>
+#include <sys/ifunc.h>
+
+#define ELF_MACHINE_IRELA	1
+
+static inline uintptr_t
+__attribute ((always_inline))
+elf_ifunc_invoke (uintptr_t addr)
+{
+  __ifunc_arg_t arg;
+
+  arg._size = sizeof (arg);
+  arg._hwcap = GLRO(dl_hwcap);
+  arg._hwcap2 = GLRO(dl_hwcap2);
+  return ((uintptr_t (*) (uint64_t, const __ifunc_arg_t *)) (addr))
+	 (GLRO(dl_hwcap) | _IFUNC_ARG_HWCAP, &arg);
+}
+
+#include <cheri-rel.h>
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const ElfW(Rela) *reloc)
+{
+  const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
+
+  if (__glibc_likely (r_type == MORELLO_R(IRELATIVE)))
+    {
+      struct link_map *main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+      void *reloc_addr = (void *) dl_rw_ptr (main_map, reloc->r_offset);
+      uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+      uint64_t base = main_map->l_addr;
+      uintptr_t cap_rx = main_map->l_map_start;
+      uintptr_t cap_rw = main_map->l_rw_start;
+      uintptr_t value
+	= morello_relative (base, cap_rx, cap_rw, reloc, reloc_addr);
+      *cap_reloc_addr = elf_ifunc_invoke (value);
+    }
+  else
+    __libc_fatal ("Unexpected reloc type in static binary.\n");
+}
+
+#endif
diff --git a/sysdeps/aarch64/morello/dl-machine.h b/sysdeps/aarch64/morello/dl-machine.h
new file mode 100644
index 0000000000..11e026302c
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-machine.h
@@ -0,0 +1,471 @@
+/* Copyright (C) 1995-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef dl_machine_h
+#define dl_machine_h
+
+#define ELF_MACHINE_NAME "aarch64"
+
+#include <sysdep.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-static-tls.h>
+#include <dl-irel.h>
+#include <dl-machine-rel.h>
+#include <cpu-features.c>
+
+/* Translate a processor specific dynamic tag to the index in l_info array.  */
+#define DT_AARCH64(x) (DT_AARCH64_##x - DT_LOPROC + DT_NUM)
+
+/* Return nonzero iff ELF header is compatible with the running host.  */
+static inline int __attribute__ ((unused))
+elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
+{
+  return ehdr->e_machine == EM_AARCH64
+	 && (ehdr->e_flags & EF_AARCH64_CHERI_PURECAP) != 0;
+}
+
+/* Set up the loaded object described by L so its unrelocated PLT
+   entries will jump to the on-demand fixup code in dl-runtime.c.  */
+
+static inline int __attribute__ ((unused))
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+			   int lazy, int profile)
+{
+  if (l->l_info[DT_JMPREL] && lazy)
+    {
+      uintptr_t *got;
+      extern void _dl_runtime_resolve (ElfW(Word));
+      extern void _dl_runtime_profile (ElfW(Word));
+
+      got = (uintptr_t *) D_PTR (l, l_info[DT_PLTGOT]);
+      if (got[1])
+	{
+	  l->l_mach.plt = dl_rx_ptr (l, got[1]);
+	}
+      got[1] = (uintptr_t) l;
+
+      /* The got[2] entry contains the address of a function which gets
+	 called to get the address of a so far unresolved function and
+	 jump to it.  The profiling extension of the dynamic linker allows
+	 to intercept the calls to collect information.  In this case we
+	 don't store the address in the GOT so that all future calls also
+	 end in this function.  */
+      if ( profile)
+	{
+	   got[2] = (uintptr_t) &_dl_runtime_profile;
+
+	  if (GLRO(dl_profile) != NULL
+	      && _dl_name_match_p (GLRO(dl_profile), l))
+	    /* Say that we really want profiling and the timers are
+	       started.  */
+	    GL(dl_profile_map) = l;
+	}
+      else
+	{
+	  /* This function will get called to fix up the GOT entry
+	     indicated by the offset on the stack, and then jump to
+	     the resolved address.  */
+	  got[2] = (uintptr_t) &_dl_runtime_resolve;
+	}
+    }
+
+  return lazy;
+}
+
+/* Runtime _DYNAMIC without dynamic relocations.  */
+static void * __attribute__ ((unused))
+elf_machine_runtime_dynamic (void)
+{
+  void *p;
+  asm (""
+    ".weak _DYNAMIC\n"
+    ".hidden _DYNAMIC\n"
+    "adrp %0, _DYNAMIC\n"
+    "add %0, %0, :lo12:_DYNAMIC\n" : "=r"(p));
+  return p;
+}
+
+/* PCC relative access to ehdr before relocations are processed.  */
+static const ElfW(Ehdr) *
+elf_machine_ehdr (void)
+{
+  const void *p;
+  asm (""
+    ".weak __ehdr_start\n"
+    ".hidden __ehdr_start\n"
+    "adrp %0, __ehdr_start\n"
+    "add %0, %0, :lo12:__ehdr_start\n" : "=r"(p));
+  return p;
+}
+
+/* Set up ld.so root capabilities and base address from args.  */
+static void __attribute__ ((unused))
+elf_machine_rtld_base_setup (struct link_map *map, void *args)
+{
+  uintptr_t *sp;
+  long argc;
+  uintptr_t cap_rx, cap_rw, cap_exe_rx, cap_exe_rw;
+  unsigned long ldso_base = 0;
+
+  sp = args;
+  argc = sp[0];
+  /* Skip argv.  */
+  sp += argc + 2;
+  /* Skip environ.  */
+  for (; *sp; sp++);
+  sp++;
+  cap_rx = cap_rw = cap_exe_rx = cap_exe_rw = 0;
+  for (; *sp != AT_NULL; sp += 2)
+    {
+      long t = sp[0];
+      if (t == AT_BASE)
+	ldso_base = sp[1];
+      if (t == AT_CHERI_INTERP_RX_CAP)
+	cap_rx = sp[1];
+      if (t == AT_CHERI_INTERP_RW_CAP)
+	cap_rw = sp[1];
+      if (t == AT_CHERI_EXEC_RX_CAP)
+	cap_exe_rx = sp[1];
+      if (t == AT_CHERI_EXEC_RW_CAP)
+	cap_exe_rw = sp[1];
+    }
+  /* Check if ldso is the executable.  */
+  if (ldso_base == 0)
+    {
+      cap_rx = cap_exe_rx;
+      cap_rw = cap_exe_rw;
+      ldso_base = cap_rx; /* Assume load segments start at vaddr 0.  */
+    }
+  map->l_addr = ldso_base;
+  map->l_map_start = cap_rx;
+  map->l_rw_start = cap_rw;
+
+  /* Set up the RW ranges of ld.so, required for symbolic relocations.  */
+  const ElfW(Ehdr) *ehdr = elf_machine_ehdr ();
+  const ElfW(Phdr) *phdr = (const void *) ehdr + ehdr->e_phoff;
+  if (sizeof *phdr != ehdr->e_phentsize)
+    __builtin_trap ();
+  for (const ElfW(Phdr) *ph = phdr; ph < phdr + ehdr->e_phnum; ph++)
+    if (ph->p_type == PT_LOAD && (ph->p_flags & PF_W))
+      {
+	uintptr_t allocend = map->l_addr + ph->p_vaddr + ph->p_memsz;
+	if (map->l_rw_count >= DL_MAX_RW_COUNT)
+	  __builtin_trap ();
+	map->l_rw_range[map->l_rw_count].start = map->l_addr + ph->p_vaddr;
+	map->l_rw_range[map->l_rw_count].end = allocend;
+	map->l_rw_count++;
+      }
+}
+
+/* In elf/rtld.c _dl_start should be global so dl-start.S can reference it.  */
+#define RTLD_START asm (".globl _dl_start");
+
+#define elf_machine_type_class(type)					\
+  (((type) == MORELLO_R(JUMP_SLOT)					\
+     || (type) == MORELLO_R(TPREL128)					\
+     || (type) == MORELLO_R(TLSDESC)) * ELF_RTYPE_CLASS_PLT)
+
+#define ELF_MACHINE_JMP_SLOT	MORELLO_R(JUMP_SLOT)
+
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+    /* Avoid an empty string which would disturb us.  */
+    GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
+  init_cpu_features (&GLRO(dl_aarch64_cpu_features));
+#endif
+}
+
+
+static inline uintptr_t
+elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+		       const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
+		       const ElfW(Rela) *reloc,
+		       uintptr_t *reloc_addr,
+		       uintptr_t value)
+{
+  return *reloc_addr = value;
+}
+
+/* Return the final value of a plt relocation.  */
+static inline uintptr_t
+elf_machine_plt_value (struct link_map *map,
+		       const ElfW(Rela) *reloc,
+		       uintptr_t value)
+{
+  return value;
+}
+
+#endif
+
+/* Names of the architecture-specific auditing callback functions.  */
+#define ARCH_LA_PLTENTER aarch64_gnu_pltenter
+#define ARCH_LA_PLTEXIT  aarch64_gnu_pltexit
+
+#ifdef RESOLVE_MAP
+
+# include <cheri_perms.h>
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+		  const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+		  const struct r_found_version *version,
+		  void *const reloc_addr, int skip_ifunc)
+{
+  uint64_t *__attribute__((may_alias)) u64_reloc_addr = reloc_addr;
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+
+  if (r_type == MORELLO_R(RELATIVE))
+    *cap_reloc_addr = morello_relative (map->l_addr, map->l_map_start,
+					map->l_rw_start, reloc, reloc_addr);
+  else if (r_type == AARCH64_R(RELATIVE))
+    *u64_reloc_addr = map->l_addr + reloc->r_addend;
+  else if (__builtin_expect (r_type == R_AARCH64_NONE, 0))
+    return;
+  else
+    {
+      struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+					      r_type);
+      uintptr_t value = SYMBOL_ADDRESS (sym_map, sym, true);
+
+      if (sym != NULL
+	  && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC)
+	  && __glibc_likely (sym->st_shndx != SHN_UNDEF)
+	  && __glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+
+      switch (r_type)
+	{
+	case MORELLO_R(CAPINIT):
+	case MORELLO_R(GLOB_DAT):
+	case MORELLO_R(JUMP_SLOT):
+	{
+	  if (sym == NULL)
+	    {
+	      /* Undefined weak symbol.  */
+	      *cap_reloc_addr = value + reloc->r_addend;
+	      break;
+	    }
+
+	  unsigned long perm_mask = CAP_PERM_MASK_RX;
+	  switch (ELFW(ST_TYPE) (sym->st_info))
+	    {
+	      case STT_OBJECT:
+		perm_mask = CAP_PERM_MASK_R;
+		for (int i = 0; i < sym_map->l_rw_count; i++)
+		  if (sym_map->l_rw_range[i].start <= value
+		      && sym_map->l_rw_range[i].end > value)
+		    {
+		      value = dl_rw_ptr (sym_map, value - sym_map->l_addr);
+		      perm_mask = CAP_PERM_MASK_RW;
+		      break;
+		    }
+		value = __builtin_cheri_bounds_set_exact (value, sym->st_size);
+		break;
+	      case STT_FUNC:
+	      case STT_GNU_IFUNC:
+		/* value already has RX bounds.  */
+		break;
+	      default:
+		/* STT_NONE or unknown symbol: readonly.  */
+		perm_mask = CAP_PERM_MASK_R;
+	    }
+	  value = value + reloc->r_addend;
+	  value = __builtin_cheri_perms_and (value, perm_mask);
+
+	  /* Seal capabilities, which provide execute permission, with MORELLO_RB.  */
+	  if (perm_mask == CAP_PERM_MASK_RX)
+	    value = __builtin_cheri_seal_entry (value);
+
+	  *cap_reloc_addr = value;
+	}
+	break;
+
+# ifndef RTLD_BOOTSTRAP
+	case AARCH64_R(ABS64):
+	  *u64_reloc_addr = value + reloc->r_addend;
+	  break;
+
+	case MORELLO_R(IRELATIVE):
+	{
+	  uintptr_t value = morello_relative (map->l_addr,
+					      map->l_map_start,
+					      map->l_rw_start,
+					      reloc,
+					      reloc_addr);
+	  if (__glibc_likely (!skip_ifunc))
+	    value = elf_ifunc_invoke (value);
+	  *cap_reloc_addr = value;
+	}
+	break;
+
+	case MORELLO_R(TLSDESC):
+	{
+	  struct tlsdesc volatile *td = reloc_addr;
+	  if (! sym)
+	    {
+	      td->pair.off = reloc->r_addend;
+	      td->entry = _dl_tlsdesc_undefweak;
+	    }
+	  else
+	    {
+#  ifndef SHARED
+	      CHECK_STATIC_TLS (map, sym_map);
+#  else
+	      if (!TRY_STATIC_TLS (map, sym_map))
+		{
+		  size_t size = td->pair.size;
+		  if (size == 0)
+		    size = sym->st_size;
+		  struct tlsdesc_dynamic_arg *arg = _dl_make_tlsdesc_dynamic
+		    (sym_map, sym->st_value + reloc->r_addend);
+		  arg->tlsinfo.ti_size = size;
+		  td->arg = arg;
+		  td->entry = _dl_tlsdesc_dynamic;
+		}
+	      else
+#  endif
+		{
+		  td->pair.off = sym->st_value + sym_map->l_tls_offset
+				 + reloc->r_addend;
+		  if (td->pair.size == 0)
+		    td->pair.size = sym->st_size;
+		  td->entry = _dl_tlsdesc_return;
+		}
+	    }
+	}
+	break;
+	case MORELLO_R(TPREL128):
+	{
+	  CHECK_STATIC_TLS (map, sym_map);
+	  u64_reloc_addr[0] = sym->st_value + reloc->r_addend
+			      + sym_map->l_tls_offset;
+	  if (u64_reloc_addr[1] == 0)
+	    u64_reloc_addr[1] = sym->st_size;
+	}
+	break;
+# endif /* !RTLD_BOOTSTRAP */
+	default:
+	  _dl_reloc_bad_type (map, r_type, 0);
+	  break;
+	}
+    }
+}
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_rela_relative (struct link_map *map, const ElfW(Rela) *reloc)
+{
+  ElfW(Addr) l_addr = map->l_addr;
+  uintptr_t cap_rx = map->l_map_start;
+  uintptr_t cap_rw = map->l_rw_start;
+  void *const reloc_addr
+    = (void *) __builtin_cheri_address_set (cap_rw, l_addr + reloc->r_offset);
+  uint64_t *__attribute__((may_alias)) u64_reloc_addr = reloc_addr;
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+  if (r_type == MORELLO_R(RELATIVE))
+    *cap_reloc_addr = morello_relative (l_addr, cap_rx, cap_rw,
+					reloc, reloc_addr);
+  else
+    *u64_reloc_addr = l_addr + reloc->r_addend;
+}
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
+		      ElfW(Addr) l_addr,
+		      const ElfW(Rela) *reloc,
+		      int skip_ifunc)
+{
+  void *reloc_addr = (void *) dl_rw_ptr (map, reloc->r_offset);
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+  /* Check for unexpected PLT reloc type.  */
+  if (__builtin_expect (r_type == MORELLO_R(JUMP_SLOT), 1))
+    {
+      if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL))
+	{
+	  /* Check the symbol table for variant PCS symbols.  */
+	  const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+	  const ElfW (Sym) *symtab =
+	    (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+	  const ElfW (Sym) *sym = &symtab[symndx];
+	  if (__glibc_unlikely (sym->st_other & STO_AARCH64_VARIANT_PCS))
+	    {
+	      /* Avoid lazy resolution of variant PCS symbols.  */
+	      const struct r_found_version *version = NULL;
+	      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+		{
+		  const ElfW (Half) *vernum =
+		    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+		  version = &map->l_versions[vernum[symndx] & 0x7fff];
+		}
+	      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+				skip_ifunc);
+	      return;
+	    }
+	}
+
+      if (map->l_mach.plt == 0)
+	*cap_reloc_addr = dl_rx_ptr (map, *cap_reloc_addr);
+      else
+	*cap_reloc_addr = map->l_mach.plt;
+    }
+  else if (__builtin_expect (r_type == MORELLO_R(TLSDESC), 1))
+    {
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
+
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+			skip_ifunc);
+    }
+  else if (__glibc_unlikely (r_type == MORELLO_R(IRELATIVE)))
+    {
+      uintptr_t value = morello_relative (map->l_addr, map->l_map_start,
+					  map->l_rw_start, reloc, reloc_addr);
+      if (__glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+      *cap_reloc_addr = value;
+    }
+  else
+    _dl_reloc_bad_type (map, r_type, 1);
+}
+
+#endif
diff --git a/sysdeps/aarch64/morello/dl-tlsdesc.S b/sysdeps/aarch64/morello/dl-tlsdesc.S
new file mode 100644
index 0000000000..6fced53734
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-tlsdesc.S
@@ -0,0 +1,229 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 Morello version.
+   Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[csp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [csp, #32*1];			\
+	stp	 q4,  q5, [csp, #32*2];			\
+	stp	 q6,  q7, [csp, #32*3];			\
+	stp	 q8,  q9, [csp, #32*4];			\
+	stp	q10, q11, [csp, #32*5];			\
+	stp	q12, q13, [csp, #32*6];			\
+	stp	q14, q15, [csp, #32*7];			\
+	stp	q16, q17, [csp, #32*8];			\
+	stp	q18, q19, [csp, #32*9];			\
+	stp	q20, q21, [csp, #32*10];		\
+	stp	q22, q23, [csp, #32*11];		\
+	stp	q24, q25, [csp, #32*12];		\
+	stp	q26, q27, [csp, #32*13];		\
+	stp	q28, q29, [csp, #32*14];		\
+	stp	q30, q31, [csp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [csp, #32*1];			\
+	ldp	 q4,  q5, [csp, #32*2];			\
+	ldp	 q6,  q7, [csp, #32*3];			\
+	ldp	 q8,  q9, [csp, #32*4];			\
+	ldp	q10, q11, [csp, #32*5];			\
+	ldp	q12, q13, [csp, #32*6];			\
+	ldp	q14, q15, [csp, #32*7];			\
+	ldp	q16, q17, [csp, #32*8];			\
+	ldp	q18, q19, [csp, #32*9];			\
+	ldp	q20, q21, [csp, #32*10];		\
+	ldp	q22, q23, [csp, #32*11];		\
+	ldp	q24, q25, [csp, #32*12];		\
+	ldp	q26, q27, [csp, #32*13];		\
+	ldp	q28, q29, [csp, #32*14];		\
+	ldp	q30, q31, [csp, #32*15];		\
+	ldp	 q0,  q1, [csp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the address for symbols in the static TLS block.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *tdp, void *unused, void *tp);
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	ldp	x0, x1, [c0, #PTR_SIZE] /* Load offset, size.  */
+	add	c0, c2, x0
+	scbndse	c0, c0, x1
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Handler for undefined weak TLS symbols: returns NULL.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *tdp, void *unused, void *tp);
+	 */
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	mov	x0, 0
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *tdp, void *unused, void *tp);
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the address of the tls object.
+
+	   void *
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp, void *unused, void *tp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)tp + TCBHEAD_DTV);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset;
+
+	     return ___tls_get_addr (&td->tlsinfo);
+	   }
+	 */
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+	stp	c3, c4, [csp, #-32]!
+	cfi_adjust_cfa_offset (32)
+	cfi_rel_offset (c3, 0)
+	cfi_rel_offset (c4, 16)
+
+	ldr	c1, [c0,#TLSDESC_ARG]
+	ldr	c0, [c2,#TCBHEAD_DTV]
+	ldr	x3, [c1,#TLSDESC_GEN_COUNT]
+	ldr	x4, [c0,#DTV_COUNTER]
+	cmp	x3, x4
+	b.hi	2f
+	/* Load r3 = td->tlsinfo.ti_module and r4 = td->tlsinfo.ti_offset.  */
+	ldp	x3, x4, [c1,#TLSDESC_MODID]
+	lsl	x3, x3, #(PTR_LOG_SIZE+1)
+	ldr	c0, [c0, x3] /* Load val member of DTV entry.  */
+	cmp	x0, #TLS_DTV_UNALLOCATED
+	b.eq	2f
+	cfi_remember_state
+	/* Load r3 = td->tlsinfo.ti_size.  */
+	ldr	x3, [c1, #TLSDESC_SIZE]
+	add	c0, c0, x4
+	scbndse	c0, c0, x3
+1:
+	ldp	c3, c4, [csp], #32
+	cfi_adjust_cfa_offset (-32)
+	RET
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+	cfi_restore_state
+
+# define NSAVEXREGPAIRS 9
+	stp	c29, c30, [csp,#-32*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (32*NSAVEXREGPAIRS)
+	cfi_rel_offset (c29, 0)
+	cfi_rel_offset (c30, 16)
+	mov	c29, csp
+	stp	 c5,  c6, [csp, #32*1]
+	stp	 c7,  c8, [csp, #32*2]
+	stp	 c9, c10, [csp, #32*3]
+	stp	c11, c12, [csp, #32*4]
+	stp	c13, c14, [csp, #32*5]
+	stp	c15, c16, [csp, #32*6]
+	stp	c17, c18, [csp, #32*7]
+	cfi_rel_offset (c5, 32*1)
+	cfi_rel_offset (c6, 32*1+8)
+	cfi_rel_offset (c7, 32*2)
+	cfi_rel_offset (c8, 32*2+8)
+	cfi_rel_offset (c9, 32*3)
+	cfi_rel_offset (c10, 32*3+8)
+	cfi_rel_offset (c11, 32*4)
+	cfi_rel_offset (c12, 32*4+8)
+	cfi_rel_offset (c13, 32*5)
+	cfi_rel_offset (c14, 32*5+8)
+	cfi_rel_offset (c15, 32*6)
+	cfi_rel_offset (c16, 32*6+8)
+	cfi_rel_offset (c17, 32*7)
+	cfi_rel_offset (c18, 32*7+8)
+
+	SAVE_Q_REGISTERS
+
+	/* TODO: remove once __tls_get_addr is fixed.  */
+	str	c1, [csp, #32*8]
+
+	mov	c0, c1
+	bl	__tls_get_addr
+
+	/* TODO: __tls_get_addr should return bounded pointer,
+	   currently it does not so bound it here.  */
+	ldr	c1, [csp, #32*8]
+	ldr	x3, [c1, #TLSDESC_SIZE]
+	scbndse	c0, c0, x3
+
+	mrs	c2, ctpidr_el0 /* Restore c2.  */
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 c5,  c6, [csp, #32*1]
+	ldp	 c7,  c8, [csp, #32*2]
+	ldp	 c9, c10, [csp, #32*3]
+	ldp	c11, c12, [csp, #32*4]
+	ldp	c13, c14, [csp, #32*5]
+	ldp	c15, c16, [csp, #32*6]
+	ldp	c17, c18, [csp, #32*7]
+
+	ldp	c29, c30, [csp], #32*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-32*NSAVEXREGPAIRS)
+	cfi_restore (c29)
+	cfi_restore (c30)
+
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+#endif
diff --git a/sysdeps/aarch64/morello/dl-tlsdesc.h b/sysdeps/aarch64/morello/dl-tlsdesc.h
new file mode 100644
index 0000000000..ced0965126
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-tlsdesc.h
@@ -0,0 +1,62 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+   Morello version.
+   Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_MORELLO_DL_TLSDESC_H
+#define _AARCH64_MORELLO_DL_TLSDESC_H 1
+
+/* Type used to represent a TLS descriptor in the GOT.  */
+struct tlsdesc
+{
+  void *(*entry) (struct tlsdesc *, void *, void *);
+  union {
+    void *arg;
+    struct { unsigned long off, size; } pair;
+  };
+};
+
+typedef struct dl_tls_index
+{
+  unsigned long int ti_module;
+  unsigned long int ti_offset;
+  unsigned long int ti_size;
+} tls_index;
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+   needs dynamic TLS offsets.  */
+struct tlsdesc_dynamic_arg
+{
+  tls_index tlsinfo;
+  size_t gen_count;
+};
+
+extern attribute_hidden void *
+_dl_tlsdesc_return (struct tlsdesc *, void *, void *);
+
+extern attribute_hidden void *
+_dl_tlsdesc_undefweak (struct tlsdesc *, void *, void *);
+
+# ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+
+extern attribute_hidden void *
+_dl_tlsdesc_dynamic (struct tlsdesc *, void *, void *);
+#endif
+
+#endif
diff --git a/sysdeps/aarch64/morello/tlsdesc.sym b/sysdeps/aarch64/morello/tlsdesc.sym
new file mode 100644
index 0000000000..adb061eb0e
--- /dev/null
+++ b/sysdeps/aarch64/morello/tlsdesc.sym
@@ -0,0 +1,19 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG		offsetof(struct tlsdesc, arg)
+
+TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+TLSDESC_SIZE		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_size)
+TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
+DTV_COUNTER		offsetof(dtv_t, counter)
+TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
diff --git a/sysdeps/aarch64/sys/ifunc.h b/sysdeps/aarch64/sys/ifunc.h
index 582c834e74..82e6a0a5e3 100644
--- a/sysdeps/aarch64/sys/ifunc.h
+++ b/sysdeps/aarch64/sys/ifunc.h
@@ -24,7 +24,7 @@
 
 /* The prototype of a gnu indirect function resolver on AArch64 is
 
-     ElfW(Addr) ifunc_resolver (uint64_t, const __ifunc_arg_t *);
+     elfptr_t ifunc_resolver (uint64_t, const __ifunc_arg_t *);
 
    the first argument should have the _IFUNC_ARG_HWCAP bit set and
    the remaining bits should match the AT_HWCAP settings.  */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [glibc/arm/morello/main] aarch64: morello: dynamic linking support
@ 2022-10-27 13:58 Szabolcs Nagy
  0 siblings, 0 replies; 3+ messages in thread
From: Szabolcs Nagy @ 2022-10-27 13:58 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=aa3f6a49104d9d55c952bcb73c600321aa367cf5

commit aa3f6a49104d9d55c952bcb73c600321aa367cf5
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Tue Mar 29 15:24:38 2022 +0100

    aarch64: morello: dynamic linking support
    
    Add morello specific dl-machine.h.
    
    Add morello dynamic relocation processing support for purecap ABI.
    Only support R_AARCH64_NONE, R_AARCH64_ABS64 and R_AARCH64_RELATIVE
    dynamic relocs from the lp64 ABI.
    
    RELATIVE and IRELATIVE relocs use a helper function from cheri-rel.h
    to construct a capability.  Also fixed the IRELATIVE handling for
    static linking.
    
    Use new machine routines on morello for load address computation so it
    is a valid capability:
    
     void *elf_machine_runtime_dynamic (void)
     void elf_machine_rtld_base_setup (struct link_map *map, void *args)
    
    The ld.so load address and RX, RW capabilities are derived from auxv
    and the RW ranges are set up based on the ld.so program headers early.
    
    __tls_get_addr should return a bounded pointer instead of fixing it in
    _dl_make_tlsdesc_dynamic, this is done in a separate patch.

Diff:
---
 sysdeps/aarch64/ldsodefs.h           |   5 +
 sysdeps/aarch64/linkmap.h            |   2 +-
 sysdeps/aarch64/morello/dl-irel.h    |  68 +++++
 sysdeps/aarch64/morello/dl-machine.h | 471 +++++++++++++++++++++++++++++++++++
 sysdeps/aarch64/morello/dl-tlsdesc.S | 229 +++++++++++++++++
 sysdeps/aarch64/morello/dl-tlsdesc.h |  62 +++++
 sysdeps/aarch64/morello/tlsdesc.sym  |  19 ++
 sysdeps/aarch64/sys/ifunc.h          |   2 +-
 8 files changed, 856 insertions(+), 2 deletions(-)

diff --git a/sysdeps/aarch64/ldsodefs.h b/sysdeps/aarch64/ldsodefs.h
index ab42b05f6c..b0b23df93c 100644
--- a/sysdeps/aarch64/ldsodefs.h
+++ b/sysdeps/aarch64/ldsodefs.h
@@ -22,6 +22,11 @@
 #include <elf.h>
 #include <cpu-features.h>
 
+#ifdef __CHERI_PURE_CAPABILITY__
+# define DO_ELF_MACHINE_REL_RELATIVE(map, l_addr, relative) \
+  elf_machine_rela_relative (map, relative)
+#endif
+
 struct La_aarch64_regs;
 struct La_aarch64_retval;
 
diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
index 1cf59dbdf8..7a7bcde4a9 100644
--- a/sysdeps/aarch64/linkmap.h
+++ b/sysdeps/aarch64/linkmap.h
@@ -20,7 +20,7 @@
 
 struct link_map_machine
 {
-  ElfW(Addr) plt;	  /* Address of .plt */
+  elfptr_t plt;		  /* Address of .plt */
   void *tlsdesc_table;	  /* Address of TLS descriptor hash table.  */
   bool bti_fail;	  /* Failed to enable Branch Target Identification.  */
 };
diff --git a/sysdeps/aarch64/morello/dl-irel.h b/sysdeps/aarch64/morello/dl-irel.h
new file mode 100644
index 0000000000..e12d29a089
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-irel.h
@@ -0,0 +1,68 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+   AArch64 version.
+   Copyright (C) 2012-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <sysdep.h>
+#include <sys/ifunc.h>
+
+#define ELF_MACHINE_IRELA	1
+
+static inline uintptr_t
+__attribute ((always_inline))
+elf_ifunc_invoke (uintptr_t addr)
+{
+  __ifunc_arg_t arg;
+
+  arg._size = sizeof (arg);
+  arg._hwcap = GLRO(dl_hwcap);
+  arg._hwcap2 = GLRO(dl_hwcap2);
+  return ((uintptr_t (*) (uint64_t, const __ifunc_arg_t *)) (addr))
+	 (GLRO(dl_hwcap) | _IFUNC_ARG_HWCAP, &arg);
+}
+
+#include <cheri-rel.h>
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const ElfW(Rela) *reloc)
+{
+  const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
+
+  if (__glibc_likely (r_type == MORELLO_R(IRELATIVE)))
+    {
+      struct link_map *main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+      void *reloc_addr = (void *) dl_rw_ptr (main_map, reloc->r_offset);
+      uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+      uint64_t base = main_map->l_addr;
+      uintptr_t cap_rx = main_map->l_map_start;
+      uintptr_t cap_rw = main_map->l_rw_start;
+      uintptr_t value
+	= morello_relative (base, cap_rx, cap_rw, reloc, reloc_addr);
+      *cap_reloc_addr = elf_ifunc_invoke (value);
+    }
+  else
+    __libc_fatal ("Unexpected reloc type in static binary.\n");
+}
+
+#endif
diff --git a/sysdeps/aarch64/morello/dl-machine.h b/sysdeps/aarch64/morello/dl-machine.h
new file mode 100644
index 0000000000..11e026302c
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-machine.h
@@ -0,0 +1,471 @@
+/* Copyright (C) 1995-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef dl_machine_h
+#define dl_machine_h
+
+#define ELF_MACHINE_NAME "aarch64"
+
+#include <sysdep.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-static-tls.h>
+#include <dl-irel.h>
+#include <dl-machine-rel.h>
+#include <cpu-features.c>
+
+/* Translate a processor specific dynamic tag to the index in l_info array.  */
+#define DT_AARCH64(x) (DT_AARCH64_##x - DT_LOPROC + DT_NUM)
+
+/* Return nonzero iff ELF header is compatible with the running host.  */
+static inline int __attribute__ ((unused))
+elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
+{
+  return ehdr->e_machine == EM_AARCH64
+	 && (ehdr->e_flags & EF_AARCH64_CHERI_PURECAP) != 0;
+}
+
+/* Set up the loaded object described by L so its unrelocated PLT
+   entries will jump to the on-demand fixup code in dl-runtime.c.  */
+
+static inline int __attribute__ ((unused))
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+			   int lazy, int profile)
+{
+  if (l->l_info[DT_JMPREL] && lazy)
+    {
+      uintptr_t *got;
+      extern void _dl_runtime_resolve (ElfW(Word));
+      extern void _dl_runtime_profile (ElfW(Word));
+
+      got = (uintptr_t *) D_PTR (l, l_info[DT_PLTGOT]);
+      if (got[1])
+	{
+	  l->l_mach.plt = dl_rx_ptr (l, got[1]);
+	}
+      got[1] = (uintptr_t) l;
+
+      /* The got[2] entry contains the address of a function which gets
+	 called to get the address of a so far unresolved function and
+	 jump to it.  The profiling extension of the dynamic linker allows
+	 to intercept the calls to collect information.  In this case we
+	 don't store the address in the GOT so that all future calls also
+	 end in this function.  */
+      if ( profile)
+	{
+	   got[2] = (uintptr_t) &_dl_runtime_profile;
+
+	  if (GLRO(dl_profile) != NULL
+	      && _dl_name_match_p (GLRO(dl_profile), l))
+	    /* Say that we really want profiling and the timers are
+	       started.  */
+	    GL(dl_profile_map) = l;
+	}
+      else
+	{
+	  /* This function will get called to fix up the GOT entry
+	     indicated by the offset on the stack, and then jump to
+	     the resolved address.  */
+	  got[2] = (uintptr_t) &_dl_runtime_resolve;
+	}
+    }
+
+  return lazy;
+}
+
+/* Runtime _DYNAMIC without dynamic relocations.  */
+static void * __attribute__ ((unused))
+elf_machine_runtime_dynamic (void)
+{
+  void *p;
+  asm (""
+    ".weak _DYNAMIC\n"
+    ".hidden _DYNAMIC\n"
+    "adrp %0, _DYNAMIC\n"
+    "add %0, %0, :lo12:_DYNAMIC\n" : "=r"(p));
+  return p;
+}
+
+/* PCC relative access to ehdr before relocations are processed.  */
+static const ElfW(Ehdr) *
+elf_machine_ehdr (void)
+{
+  const void *p;
+  asm (""
+    ".weak __ehdr_start\n"
+    ".hidden __ehdr_start\n"
+    "adrp %0, __ehdr_start\n"
+    "add %0, %0, :lo12:__ehdr_start\n" : "=r"(p));
+  return p;
+}
+
+/* Set up ld.so root capabilities and base address from args.  */
+static void __attribute__ ((unused))
+elf_machine_rtld_base_setup (struct link_map *map, void *args)
+{
+  uintptr_t *sp;
+  long argc;
+  uintptr_t cap_rx, cap_rw, cap_exe_rx, cap_exe_rw;
+  unsigned long ldso_base = 0;
+
+  sp = args;
+  argc = sp[0];
+  /* Skip argv.  */
+  sp += argc + 2;
+  /* Skip environ.  */
+  for (; *sp; sp++);
+  sp++;
+  cap_rx = cap_rw = cap_exe_rx = cap_exe_rw = 0;
+  for (; *sp != AT_NULL; sp += 2)
+    {
+      long t = sp[0];
+      if (t == AT_BASE)
+	ldso_base = sp[1];
+      if (t == AT_CHERI_INTERP_RX_CAP)
+	cap_rx = sp[1];
+      if (t == AT_CHERI_INTERP_RW_CAP)
+	cap_rw = sp[1];
+      if (t == AT_CHERI_EXEC_RX_CAP)
+	cap_exe_rx = sp[1];
+      if (t == AT_CHERI_EXEC_RW_CAP)
+	cap_exe_rw = sp[1];
+    }
+  /* Check if ldso is the executable.  */
+  if (ldso_base == 0)
+    {
+      cap_rx = cap_exe_rx;
+      cap_rw = cap_exe_rw;
+      ldso_base = cap_rx; /* Assume load segments start at vaddr 0.  */
+    }
+  map->l_addr = ldso_base;
+  map->l_map_start = cap_rx;
+  map->l_rw_start = cap_rw;
+
+  /* Set up the RW ranges of ld.so, required for symbolic relocations.  */
+  const ElfW(Ehdr) *ehdr = elf_machine_ehdr ();
+  const ElfW(Phdr) *phdr = (const void *) ehdr + ehdr->e_phoff;
+  if (sizeof *phdr != ehdr->e_phentsize)
+    __builtin_trap ();
+  for (const ElfW(Phdr) *ph = phdr; ph < phdr + ehdr->e_phnum; ph++)
+    if (ph->p_type == PT_LOAD && (ph->p_flags & PF_W))
+      {
+	uintptr_t allocend = map->l_addr + ph->p_vaddr + ph->p_memsz;
+	if (map->l_rw_count >= DL_MAX_RW_COUNT)
+	  __builtin_trap ();
+	map->l_rw_range[map->l_rw_count].start = map->l_addr + ph->p_vaddr;
+	map->l_rw_range[map->l_rw_count].end = allocend;
+	map->l_rw_count++;
+      }
+}
+
+/* In elf/rtld.c _dl_start should be global so dl-start.S can reference it.  */
+#define RTLD_START asm (".globl _dl_start");
+
+#define elf_machine_type_class(type)					\
+  (((type) == MORELLO_R(JUMP_SLOT)					\
+     || (type) == MORELLO_R(TPREL128)					\
+     || (type) == MORELLO_R(TLSDESC)) * ELF_RTYPE_CLASS_PLT)
+
+#define ELF_MACHINE_JMP_SLOT	MORELLO_R(JUMP_SLOT)
+
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+    /* Avoid an empty string which would disturb us.  */
+    GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
+  init_cpu_features (&GLRO(dl_aarch64_cpu_features));
+#endif
+}
+
+
+static inline uintptr_t
+elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+		       const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
+		       const ElfW(Rela) *reloc,
+		       uintptr_t *reloc_addr,
+		       uintptr_t value)
+{
+  return *reloc_addr = value;
+}
+
+/* Return the final value of a plt relocation.  */
+static inline uintptr_t
+elf_machine_plt_value (struct link_map *map,
+		       const ElfW(Rela) *reloc,
+		       uintptr_t value)
+{
+  return value;
+}
+
+#endif
+
+/* Names of the architecture-specific auditing callback functions.  */
+#define ARCH_LA_PLTENTER aarch64_gnu_pltenter
+#define ARCH_LA_PLTEXIT  aarch64_gnu_pltexit
+
+#ifdef RESOLVE_MAP
+
+# include <cheri_perms.h>
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+		  const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+		  const struct r_found_version *version,
+		  void *const reloc_addr, int skip_ifunc)
+{
+  uint64_t *__attribute__((may_alias)) u64_reloc_addr = reloc_addr;
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+
+  if (r_type == MORELLO_R(RELATIVE))
+    *cap_reloc_addr = morello_relative (map->l_addr, map->l_map_start,
+					map->l_rw_start, reloc, reloc_addr);
+  else if (r_type == AARCH64_R(RELATIVE))
+    *u64_reloc_addr = map->l_addr + reloc->r_addend;
+  else if (__builtin_expect (r_type == R_AARCH64_NONE, 0))
+    return;
+  else
+    {
+      struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+					      r_type);
+      uintptr_t value = SYMBOL_ADDRESS (sym_map, sym, true);
+
+      if (sym != NULL
+	  && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC)
+	  && __glibc_likely (sym->st_shndx != SHN_UNDEF)
+	  && __glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+
+      switch (r_type)
+	{
+	case MORELLO_R(CAPINIT):
+	case MORELLO_R(GLOB_DAT):
+	case MORELLO_R(JUMP_SLOT):
+	{
+	  if (sym == NULL)
+	    {
+	      /* Undefined weak symbol.  */
+	      *cap_reloc_addr = value + reloc->r_addend;
+	      break;
+	    }
+
+	  unsigned long perm_mask = CAP_PERM_MASK_RX;
+	  switch (ELFW(ST_TYPE) (sym->st_info))
+	    {
+	      case STT_OBJECT:
+		perm_mask = CAP_PERM_MASK_R;
+		for (int i = 0; i < sym_map->l_rw_count; i++)
+		  if (sym_map->l_rw_range[i].start <= value
+		      && sym_map->l_rw_range[i].end > value)
+		    {
+		      value = dl_rw_ptr (sym_map, value - sym_map->l_addr);
+		      perm_mask = CAP_PERM_MASK_RW;
+		      break;
+		    }
+		value = __builtin_cheri_bounds_set_exact (value, sym->st_size);
+		break;
+	      case STT_FUNC:
+	      case STT_GNU_IFUNC:
+		/* value already has RX bounds.  */
+		break;
+	      default:
+		/* STT_NONE or unknown symbol: readonly.  */
+		perm_mask = CAP_PERM_MASK_R;
+	    }
+	  value = value + reloc->r_addend;
+	  value = __builtin_cheri_perms_and (value, perm_mask);
+
+	  /* Seal capabilities, which provide execute permission, with MORELLO_RB.  */
+	  if (perm_mask == CAP_PERM_MASK_RX)
+	    value = __builtin_cheri_seal_entry (value);
+
+	  *cap_reloc_addr = value;
+	}
+	break;
+
+# ifndef RTLD_BOOTSTRAP
+	case AARCH64_R(ABS64):
+	  *u64_reloc_addr = value + reloc->r_addend;
+	  break;
+
+	case MORELLO_R(IRELATIVE):
+	{
+	  uintptr_t value = morello_relative (map->l_addr,
+					      map->l_map_start,
+					      map->l_rw_start,
+					      reloc,
+					      reloc_addr);
+	  if (__glibc_likely (!skip_ifunc))
+	    value = elf_ifunc_invoke (value);
+	  *cap_reloc_addr = value;
+	}
+	break;
+
+	case MORELLO_R(TLSDESC):
+	{
+	  struct tlsdesc volatile *td = reloc_addr;
+	  if (! sym)
+	    {
+	      td->pair.off = reloc->r_addend;
+	      td->entry = _dl_tlsdesc_undefweak;
+	    }
+	  else
+	    {
+#  ifndef SHARED
+	      CHECK_STATIC_TLS (map, sym_map);
+#  else
+	      if (!TRY_STATIC_TLS (map, sym_map))
+		{
+		  size_t size = td->pair.size;
+		  if (size == 0)
+		    size = sym->st_size;
+		  struct tlsdesc_dynamic_arg *arg = _dl_make_tlsdesc_dynamic
+		    (sym_map, sym->st_value + reloc->r_addend);
+		  arg->tlsinfo.ti_size = size;
+		  td->arg = arg;
+		  td->entry = _dl_tlsdesc_dynamic;
+		}
+	      else
+#  endif
+		{
+		  td->pair.off = sym->st_value + sym_map->l_tls_offset
+				 + reloc->r_addend;
+		  if (td->pair.size == 0)
+		    td->pair.size = sym->st_size;
+		  td->entry = _dl_tlsdesc_return;
+		}
+	    }
+	}
+	break;
+	case MORELLO_R(TPREL128):
+	{
+	  CHECK_STATIC_TLS (map, sym_map);
+	  u64_reloc_addr[0] = sym->st_value + reloc->r_addend
+			      + sym_map->l_tls_offset;
+	  if (u64_reloc_addr[1] == 0)
+	    u64_reloc_addr[1] = sym->st_size;
+	}
+	break;
+# endif /* !RTLD_BOOTSTRAP */
+	default:
+	  _dl_reloc_bad_type (map, r_type, 0);
+	  break;
+	}
+    }
+}
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_rela_relative (struct link_map *map, const ElfW(Rela) *reloc)
+{
+  ElfW(Addr) l_addr = map->l_addr;
+  uintptr_t cap_rx = map->l_map_start;
+  uintptr_t cap_rw = map->l_rw_start;
+  void *const reloc_addr
+    = (void *) __builtin_cheri_address_set (cap_rw, l_addr + reloc->r_offset);
+  uint64_t *__attribute__((may_alias)) u64_reloc_addr = reloc_addr;
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+  if (r_type == MORELLO_R(RELATIVE))
+    *cap_reloc_addr = morello_relative (l_addr, cap_rx, cap_rw,
+					reloc, reloc_addr);
+  else
+    *u64_reloc_addr = l_addr + reloc->r_addend;
+}
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
+		      ElfW(Addr) l_addr,
+		      const ElfW(Rela) *reloc,
+		      int skip_ifunc)
+{
+  void *reloc_addr = (void *) dl_rw_ptr (map, reloc->r_offset);
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+  /* Check for unexpected PLT reloc type.  */
+  if (__builtin_expect (r_type == MORELLO_R(JUMP_SLOT), 1))
+    {
+      if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL))
+	{
+	  /* Check the symbol table for variant PCS symbols.  */
+	  const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+	  const ElfW (Sym) *symtab =
+	    (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+	  const ElfW (Sym) *sym = &symtab[symndx];
+	  if (__glibc_unlikely (sym->st_other & STO_AARCH64_VARIANT_PCS))
+	    {
+	      /* Avoid lazy resolution of variant PCS symbols.  */
+	      const struct r_found_version *version = NULL;
+	      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+		{
+		  const ElfW (Half) *vernum =
+		    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+		  version = &map->l_versions[vernum[symndx] & 0x7fff];
+		}
+	      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+				skip_ifunc);
+	      return;
+	    }
+	}
+
+      if (map->l_mach.plt == 0)
+	*cap_reloc_addr = dl_rx_ptr (map, *cap_reloc_addr);
+      else
+	*cap_reloc_addr = map->l_mach.plt;
+    }
+  else if (__builtin_expect (r_type == MORELLO_R(TLSDESC), 1))
+    {
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
+
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+			skip_ifunc);
+    }
+  else if (__glibc_unlikely (r_type == MORELLO_R(IRELATIVE)))
+    {
+      uintptr_t value = morello_relative (map->l_addr, map->l_map_start,
+					  map->l_rw_start, reloc, reloc_addr);
+      if (__glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+      *cap_reloc_addr = value;
+    }
+  else
+    _dl_reloc_bad_type (map, r_type, 1);
+}
+
+#endif
diff --git a/sysdeps/aarch64/morello/dl-tlsdesc.S b/sysdeps/aarch64/morello/dl-tlsdesc.S
new file mode 100644
index 0000000000..6fced53734
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-tlsdesc.S
@@ -0,0 +1,229 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 Morello version.
+   Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[csp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [csp, #32*1];			\
+	stp	 q4,  q5, [csp, #32*2];			\
+	stp	 q6,  q7, [csp, #32*3];			\
+	stp	 q8,  q9, [csp, #32*4];			\
+	stp	q10, q11, [csp, #32*5];			\
+	stp	q12, q13, [csp, #32*6];			\
+	stp	q14, q15, [csp, #32*7];			\
+	stp	q16, q17, [csp, #32*8];			\
+	stp	q18, q19, [csp, #32*9];			\
+	stp	q20, q21, [csp, #32*10];		\
+	stp	q22, q23, [csp, #32*11];		\
+	stp	q24, q25, [csp, #32*12];		\
+	stp	q26, q27, [csp, #32*13];		\
+	stp	q28, q29, [csp, #32*14];		\
+	stp	q30, q31, [csp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [csp, #32*1];			\
+	ldp	 q4,  q5, [csp, #32*2];			\
+	ldp	 q6,  q7, [csp, #32*3];			\
+	ldp	 q8,  q9, [csp, #32*4];			\
+	ldp	q10, q11, [csp, #32*5];			\
+	ldp	q12, q13, [csp, #32*6];			\
+	ldp	q14, q15, [csp, #32*7];			\
+	ldp	q16, q17, [csp, #32*8];			\
+	ldp	q18, q19, [csp, #32*9];			\
+	ldp	q20, q21, [csp, #32*10];		\
+	ldp	q22, q23, [csp, #32*11];		\
+	ldp	q24, q25, [csp, #32*12];		\
+	ldp	q26, q27, [csp, #32*13];		\
+	ldp	q28, q29, [csp, #32*14];		\
+	ldp	q30, q31, [csp, #32*15];		\
+	ldp	 q0,  q1, [csp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the address for symbols in the static TLS block.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *tdp, void *unused, void *tp);
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	ldp	x0, x1, [c0, #PTR_SIZE] /* Load offset, size.  */
+	add	c0, c2, x0
+	scbndse	c0, c0, x1
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Handler for undefined weak TLS symbols: returns NULL.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *tdp, void *unused, void *tp);
+	 */
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	mov	x0, 0
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *tdp, void *unused, void *tp);
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the address of the tls object.
+
+	   void *
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp, void *unused, void *tp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)tp + TCBHEAD_DTV);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset;
+
+	     return ___tls_get_addr (&td->tlsinfo);
+	   }
+	 */
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+	stp	c3, c4, [csp, #-32]!
+	cfi_adjust_cfa_offset (32)
+	cfi_rel_offset (c3, 0)
+	cfi_rel_offset (c4, 16)
+
+	ldr	c1, [c0,#TLSDESC_ARG]
+	ldr	c0, [c2,#TCBHEAD_DTV]
+	ldr	x3, [c1,#TLSDESC_GEN_COUNT]
+	ldr	x4, [c0,#DTV_COUNTER]
+	cmp	x3, x4
+	b.hi	2f
+	/* Load r3 = td->tlsinfo.ti_module and r4 = td->tlsinfo.ti_offset.  */
+	ldp	x3, x4, [c1,#TLSDESC_MODID]
+	lsl	x3, x3, #(PTR_LOG_SIZE+1)
+	ldr	c0, [c0, x3] /* Load val member of DTV entry.  */
+	cmp	x0, #TLS_DTV_UNALLOCATED
+	b.eq	2f
+	cfi_remember_state
+	/* Load r3 = td->tlsinfo.ti_size.  */
+	ldr	x3, [c1, #TLSDESC_SIZE]
+	add	c0, c0, x4
+	scbndse	c0, c0, x3
+1:
+	ldp	c3, c4, [csp], #32
+	cfi_adjust_cfa_offset (-32)
+	RET
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+	cfi_restore_state
+
+# define NSAVEXREGPAIRS 9
+	stp	c29, c30, [csp,#-32*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (32*NSAVEXREGPAIRS)
+	cfi_rel_offset (c29, 0)
+	cfi_rel_offset (c30, 16)
+	mov	c29, csp
+	stp	 c5,  c6, [csp, #32*1]
+	stp	 c7,  c8, [csp, #32*2]
+	stp	 c9, c10, [csp, #32*3]
+	stp	c11, c12, [csp, #32*4]
+	stp	c13, c14, [csp, #32*5]
+	stp	c15, c16, [csp, #32*6]
+	stp	c17, c18, [csp, #32*7]
+	cfi_rel_offset (c5, 32*1)
+	cfi_rel_offset (c6, 32*1+8)
+	cfi_rel_offset (c7, 32*2)
+	cfi_rel_offset (c8, 32*2+8)
+	cfi_rel_offset (c9, 32*3)
+	cfi_rel_offset (c10, 32*3+8)
+	cfi_rel_offset (c11, 32*4)
+	cfi_rel_offset (c12, 32*4+8)
+	cfi_rel_offset (c13, 32*5)
+	cfi_rel_offset (c14, 32*5+8)
+	cfi_rel_offset (c15, 32*6)
+	cfi_rel_offset (c16, 32*6+8)
+	cfi_rel_offset (c17, 32*7)
+	cfi_rel_offset (c18, 32*7+8)
+
+	SAVE_Q_REGISTERS
+
+	/* TODO: remove once __tls_get_addr is fixed.  */
+	str	c1, [csp, #32*8]
+
+	mov	c0, c1
+	bl	__tls_get_addr
+
+	/* TODO: __tls_get_addr should return bounded pointer,
+	   currently it does not so bound it here.  */
+	ldr	c1, [csp, #32*8]
+	ldr	x3, [c1, #TLSDESC_SIZE]
+	scbndse	c0, c0, x3
+
+	mrs	c2, ctpidr_el0 /* Restore c2.  */
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 c5,  c6, [csp, #32*1]
+	ldp	 c7,  c8, [csp, #32*2]
+	ldp	 c9, c10, [csp, #32*3]
+	ldp	c11, c12, [csp, #32*4]
+	ldp	c13, c14, [csp, #32*5]
+	ldp	c15, c16, [csp, #32*6]
+	ldp	c17, c18, [csp, #32*7]
+
+	ldp	c29, c30, [csp], #32*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-32*NSAVEXREGPAIRS)
+	cfi_restore (c29)
+	cfi_restore (c30)
+
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+#endif
diff --git a/sysdeps/aarch64/morello/dl-tlsdesc.h b/sysdeps/aarch64/morello/dl-tlsdesc.h
new file mode 100644
index 0000000000..ced0965126
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-tlsdesc.h
@@ -0,0 +1,62 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+   Morello version.
+   Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_MORELLO_DL_TLSDESC_H
+#define _AARCH64_MORELLO_DL_TLSDESC_H 1
+
+/* Type used to represent a TLS descriptor in the GOT.  */
+struct tlsdesc
+{
+  void *(*entry) (struct tlsdesc *, void *, void *);
+  union {
+    void *arg;
+    struct { unsigned long off, size; } pair;
+  };
+};
+
+typedef struct dl_tls_index
+{
+  unsigned long int ti_module;
+  unsigned long int ti_offset;
+  unsigned long int ti_size;
+} tls_index;
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+   needs dynamic TLS offsets.  */
+struct tlsdesc_dynamic_arg
+{
+  tls_index tlsinfo;
+  size_t gen_count;
+};
+
+extern attribute_hidden void *
+_dl_tlsdesc_return (struct tlsdesc *, void *, void *);
+
+extern attribute_hidden void *
+_dl_tlsdesc_undefweak (struct tlsdesc *, void *, void *);
+
+# ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+
+extern attribute_hidden void *
+_dl_tlsdesc_dynamic (struct tlsdesc *, void *, void *);
+#endif
+
+#endif
diff --git a/sysdeps/aarch64/morello/tlsdesc.sym b/sysdeps/aarch64/morello/tlsdesc.sym
new file mode 100644
index 0000000000..adb061eb0e
--- /dev/null
+++ b/sysdeps/aarch64/morello/tlsdesc.sym
@@ -0,0 +1,19 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG		offsetof(struct tlsdesc, arg)
+
+TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+TLSDESC_SIZE		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_size)
+TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
+DTV_COUNTER		offsetof(dtv_t, counter)
+TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
diff --git a/sysdeps/aarch64/sys/ifunc.h b/sysdeps/aarch64/sys/ifunc.h
index 582c834e74..82e6a0a5e3 100644
--- a/sysdeps/aarch64/sys/ifunc.h
+++ b/sysdeps/aarch64/sys/ifunc.h
@@ -24,7 +24,7 @@
 
 /* The prototype of a gnu indirect function resolver on AArch64 is
 
-     ElfW(Addr) ifunc_resolver (uint64_t, const __ifunc_arg_t *);
+     elfptr_t ifunc_resolver (uint64_t, const __ifunc_arg_t *);
 
    the first argument should have the _IFUNC_ARG_HWCAP bit set and
    the remaining bits should match the AT_HWCAP settings.  */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [glibc/arm/morello/main] aarch64: morello: dynamic linking support
@ 2022-10-26 15:20 Szabolcs Nagy
  0 siblings, 0 replies; 3+ messages in thread
From: Szabolcs Nagy @ 2022-10-26 15:20 UTC (permalink / raw)
  To: glibc-cvs

https://sourceware.org/git/gitweb.cgi?p=glibc.git;h=8402c6c01f706fc3ed7d4ce432c52daa465b5503

commit 8402c6c01f706fc3ed7d4ce432c52daa465b5503
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
Date:   Tue Mar 29 15:24:38 2022 +0100

    aarch64: morello: dynamic linking support
    
    Add morello specific dl-machine.h.
    
    Add morello dynamic relocation processing support for purecap ABI.
    Only support R_AARCH64_NONE, R_AARCH64_ABS64 and R_AARCH64_RELATIVE
    dynamic relocs from the lp64 ABI.
    
    RELATIVE and IRELATIVE relocs use a helper function from cheri-rel.h
    to construct a capability.  Also fixed the IRELATIVE handling for
    static linking.
    
    Use new machine routines on morello for load address computation so it
    is a valid capability:
    
     void *elf_machine_runtime_dynamic (void)
     void elf_machine_rtld_base_setup (struct link_map *map, void *args)
    
    The ld.so load address and RX, RW capabilities are derived from auxv
    and the RW ranges are set up based on the ld.so program headers early.
    
    __tls_get_addr should return a bounded pointer instead of fixing it in
    _dl_make_tlsdesc_dynamic, this is done in a separate patch.

Diff:
---
 sysdeps/aarch64/ldsodefs.h           |   5 +
 sysdeps/aarch64/linkmap.h            |   2 +-
 sysdeps/aarch64/morello/dl-irel.h    |  68 +++++
 sysdeps/aarch64/morello/dl-machine.h | 471 +++++++++++++++++++++++++++++++++++
 sysdeps/aarch64/morello/dl-tlsdesc.S | 229 +++++++++++++++++
 sysdeps/aarch64/morello/dl-tlsdesc.h |  62 +++++
 sysdeps/aarch64/morello/tlsdesc.sym  |  19 ++
 sysdeps/aarch64/sys/ifunc.h          |   2 +-
 8 files changed, 856 insertions(+), 2 deletions(-)

diff --git a/sysdeps/aarch64/ldsodefs.h b/sysdeps/aarch64/ldsodefs.h
index ab42b05f6c..b0b23df93c 100644
--- a/sysdeps/aarch64/ldsodefs.h
+++ b/sysdeps/aarch64/ldsodefs.h
@@ -22,6 +22,11 @@
 #include <elf.h>
 #include <cpu-features.h>
 
+#ifdef __CHERI_PURE_CAPABILITY__
+# define DO_ELF_MACHINE_REL_RELATIVE(map, l_addr, relative) \
+  elf_machine_rela_relative (map, relative)
+#endif
+
 struct La_aarch64_regs;
 struct La_aarch64_retval;
 
diff --git a/sysdeps/aarch64/linkmap.h b/sysdeps/aarch64/linkmap.h
index 1cf59dbdf8..7a7bcde4a9 100644
--- a/sysdeps/aarch64/linkmap.h
+++ b/sysdeps/aarch64/linkmap.h
@@ -20,7 +20,7 @@
 
 struct link_map_machine
 {
-  ElfW(Addr) plt;	  /* Address of .plt */
+  elfptr_t plt;		  /* Address of .plt */
   void *tlsdesc_table;	  /* Address of TLS descriptor hash table.  */
   bool bti_fail;	  /* Failed to enable Branch Target Identification.  */
 };
diff --git a/sysdeps/aarch64/morello/dl-irel.h b/sysdeps/aarch64/morello/dl-irel.h
new file mode 100644
index 0000000000..e12d29a089
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-irel.h
@@ -0,0 +1,68 @@
+/* Machine-dependent ELF indirect relocation inline functions.
+   AArch64 version.
+   Copyright (C) 2012-2022 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _DL_IREL_H
+#define _DL_IREL_H
+
+#include <stdio.h>
+#include <unistd.h>
+#include <ldsodefs.h>
+#include <sysdep.h>
+#include <sys/ifunc.h>
+
+#define ELF_MACHINE_IRELA	1
+
+static inline uintptr_t
+__attribute ((always_inline))
+elf_ifunc_invoke (uintptr_t addr)
+{
+  __ifunc_arg_t arg;
+
+  arg._size = sizeof (arg);
+  arg._hwcap = GLRO(dl_hwcap);
+  arg._hwcap2 = GLRO(dl_hwcap2);
+  return ((uintptr_t (*) (uint64_t, const __ifunc_arg_t *)) (addr))
+	 (GLRO(dl_hwcap) | _IFUNC_ARG_HWCAP, &arg);
+}
+
+#include <cheri-rel.h>
+
+static inline void
+__attribute ((always_inline))
+elf_irela (const ElfW(Rela) *reloc)
+{
+  const unsigned long int r_type = ELFW(R_TYPE) (reloc->r_info);
+
+  if (__glibc_likely (r_type == MORELLO_R(IRELATIVE)))
+    {
+      struct link_map *main_map = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
+      void *reloc_addr = (void *) dl_rw_ptr (main_map, reloc->r_offset);
+      uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+      uint64_t base = main_map->l_addr;
+      uintptr_t cap_rx = main_map->l_map_start;
+      uintptr_t cap_rw = main_map->l_rw_start;
+      uintptr_t value
+	= morello_relative (base, cap_rx, cap_rw, reloc, reloc_addr);
+      *cap_reloc_addr = elf_ifunc_invoke (value);
+    }
+  else
+    __libc_fatal ("Unexpected reloc type in static binary.\n");
+}
+
+#endif
diff --git a/sysdeps/aarch64/morello/dl-machine.h b/sysdeps/aarch64/morello/dl-machine.h
new file mode 100644
index 0000000000..11e026302c
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-machine.h
@@ -0,0 +1,471 @@
+/* Copyright (C) 1995-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef dl_machine_h
+#define dl_machine_h
+
+#define ELF_MACHINE_NAME "aarch64"
+
+#include <sysdep.h>
+#include <tls.h>
+#include <dl-tlsdesc.h>
+#include <dl-static-tls.h>
+#include <dl-irel.h>
+#include <dl-machine-rel.h>
+#include <cpu-features.c>
+
+/* Translate a processor specific dynamic tag to the index in l_info array.  */
+#define DT_AARCH64(x) (DT_AARCH64_##x - DT_LOPROC + DT_NUM)
+
+/* Return nonzero iff ELF header is compatible with the running host.  */
+static inline int __attribute__ ((unused))
+elf_machine_matches_host (const ElfW(Ehdr) *ehdr)
+{
+  return ehdr->e_machine == EM_AARCH64
+	 && (ehdr->e_flags & EF_AARCH64_CHERI_PURECAP) != 0;
+}
+
+/* Set up the loaded object described by L so its unrelocated PLT
+   entries will jump to the on-demand fixup code in dl-runtime.c.  */
+
+static inline int __attribute__ ((unused))
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+			   int lazy, int profile)
+{
+  if (l->l_info[DT_JMPREL] && lazy)
+    {
+      uintptr_t *got;
+      extern void _dl_runtime_resolve (ElfW(Word));
+      extern void _dl_runtime_profile (ElfW(Word));
+
+      got = (uintptr_t *) D_PTR (l, l_info[DT_PLTGOT]);
+      if (got[1])
+	{
+	  l->l_mach.plt = dl_rx_ptr (l, got[1]);
+	}
+      got[1] = (uintptr_t) l;
+
+      /* The got[2] entry contains the address of a function which gets
+	 called to get the address of a so far unresolved function and
+	 jump to it.  The profiling extension of the dynamic linker allows
+	 to intercept the calls to collect information.  In this case we
+	 don't store the address in the GOT so that all future calls also
+	 end in this function.  */
+      if ( profile)
+	{
+	   got[2] = (uintptr_t) &_dl_runtime_profile;
+
+	  if (GLRO(dl_profile) != NULL
+	      && _dl_name_match_p (GLRO(dl_profile), l))
+	    /* Say that we really want profiling and the timers are
+	       started.  */
+	    GL(dl_profile_map) = l;
+	}
+      else
+	{
+	  /* This function will get called to fix up the GOT entry
+	     indicated by the offset on the stack, and then jump to
+	     the resolved address.  */
+	  got[2] = (uintptr_t) &_dl_runtime_resolve;
+	}
+    }
+
+  return lazy;
+}
+
+/* Runtime _DYNAMIC without dynamic relocations.  */
+static void * __attribute__ ((unused))
+elf_machine_runtime_dynamic (void)
+{
+  void *p;
+  asm (""
+    ".weak _DYNAMIC\n"
+    ".hidden _DYNAMIC\n"
+    "adrp %0, _DYNAMIC\n"
+    "add %0, %0, :lo12:_DYNAMIC\n" : "=r"(p));
+  return p;
+}
+
+/* PCC relative access to ehdr before relocations are processed.  */
+static const ElfW(Ehdr) *
+elf_machine_ehdr (void)
+{
+  const void *p;
+  asm (""
+    ".weak __ehdr_start\n"
+    ".hidden __ehdr_start\n"
+    "adrp %0, __ehdr_start\n"
+    "add %0, %0, :lo12:__ehdr_start\n" : "=r"(p));
+  return p;
+}
+
+/* Set up ld.so root capabilities and base address from args.  */
+static void __attribute__ ((unused))
+elf_machine_rtld_base_setup (struct link_map *map, void *args)
+{
+  uintptr_t *sp;
+  long argc;
+  uintptr_t cap_rx, cap_rw, cap_exe_rx, cap_exe_rw;
+  unsigned long ldso_base = 0;
+
+  sp = args;
+  argc = sp[0];
+  /* Skip argv.  */
+  sp += argc + 2;
+  /* Skip environ.  */
+  for (; *sp; sp++);
+  sp++;
+  cap_rx = cap_rw = cap_exe_rx = cap_exe_rw = 0;
+  for (; *sp != AT_NULL; sp += 2)
+    {
+      long t = sp[0];
+      if (t == AT_BASE)
+	ldso_base = sp[1];
+      if (t == AT_CHERI_INTERP_RX_CAP)
+	cap_rx = sp[1];
+      if (t == AT_CHERI_INTERP_RW_CAP)
+	cap_rw = sp[1];
+      if (t == AT_CHERI_EXEC_RX_CAP)
+	cap_exe_rx = sp[1];
+      if (t == AT_CHERI_EXEC_RW_CAP)
+	cap_exe_rw = sp[1];
+    }
+  /* Check if ldso is the executable.  */
+  if (ldso_base == 0)
+    {
+      cap_rx = cap_exe_rx;
+      cap_rw = cap_exe_rw;
+      ldso_base = cap_rx; /* Assume load segments start at vaddr 0.  */
+    }
+  map->l_addr = ldso_base;
+  map->l_map_start = cap_rx;
+  map->l_rw_start = cap_rw;
+
+  /* Set up the RW ranges of ld.so, required for symbolic relocations.  */
+  const ElfW(Ehdr) *ehdr = elf_machine_ehdr ();
+  const ElfW(Phdr) *phdr = (const void *) ehdr + ehdr->e_phoff;
+  if (sizeof *phdr != ehdr->e_phentsize)
+    __builtin_trap ();
+  for (const ElfW(Phdr) *ph = phdr; ph < phdr + ehdr->e_phnum; ph++)
+    if (ph->p_type == PT_LOAD && (ph->p_flags & PF_W))
+      {
+	uintptr_t allocend = map->l_addr + ph->p_vaddr + ph->p_memsz;
+	if (map->l_rw_count >= DL_MAX_RW_COUNT)
+	  __builtin_trap ();
+	map->l_rw_range[map->l_rw_count].start = map->l_addr + ph->p_vaddr;
+	map->l_rw_range[map->l_rw_count].end = allocend;
+	map->l_rw_count++;
+      }
+}
+
+/* In elf/rtld.c _dl_start should be global so dl-start.S can reference it.  */
+#define RTLD_START asm (".globl _dl_start");
+
+#define elf_machine_type_class(type)					\
+  (((type) == MORELLO_R(JUMP_SLOT)					\
+     || (type) == MORELLO_R(TPREL128)					\
+     || (type) == MORELLO_R(TLSDESC)) * ELF_RTYPE_CLASS_PLT)
+
+#define ELF_MACHINE_JMP_SLOT	MORELLO_R(JUMP_SLOT)
+
+#define DL_PLATFORM_INIT dl_platform_init ()
+
+static inline void __attribute__ ((unused))
+dl_platform_init (void)
+{
+  if (GLRO(dl_platform) != NULL && *GLRO(dl_platform) == '\0')
+    /* Avoid an empty string which would disturb us.  */
+    GLRO(dl_platform) = NULL;
+
+#ifdef SHARED
+  /* init_cpu_features has been called early from __libc_start_main in
+     static executable.  */
+  init_cpu_features (&GLRO(dl_aarch64_cpu_features));
+#endif
+}
+
+
+static inline uintptr_t
+elf_machine_fixup_plt (struct link_map *map, lookup_t t,
+		       const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
+		       const ElfW(Rela) *reloc,
+		       uintptr_t *reloc_addr,
+		       uintptr_t value)
+{
+  return *reloc_addr = value;
+}
+
+/* Return the final value of a plt relocation.  */
+static inline uintptr_t
+elf_machine_plt_value (struct link_map *map,
+		       const ElfW(Rela) *reloc,
+		       uintptr_t value)
+{
+  return value;
+}
+
+#endif
+
+/* Names of the architecture-specific auditing callback functions.  */
+#define ARCH_LA_PLTENTER aarch64_gnu_pltenter
+#define ARCH_LA_PLTEXIT  aarch64_gnu_pltexit
+
+#ifdef RESOLVE_MAP
+
+# include <cheri_perms.h>
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+		  const ElfW(Rela) *reloc, const ElfW(Sym) *sym,
+		  const struct r_found_version *version,
+		  void *const reloc_addr, int skip_ifunc)
+{
+  uint64_t *__attribute__((may_alias)) u64_reloc_addr = reloc_addr;
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+
+  if (r_type == MORELLO_R(RELATIVE))
+    *cap_reloc_addr = morello_relative (map->l_addr, map->l_map_start,
+					map->l_rw_start, reloc, reloc_addr);
+  else if (r_type == AARCH64_R(RELATIVE))
+    *u64_reloc_addr = map->l_addr + reloc->r_addend;
+  else if (__builtin_expect (r_type == R_AARCH64_NONE, 0))
+    return;
+  else
+    {
+      struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+					      r_type);
+      uintptr_t value = SYMBOL_ADDRESS (sym_map, sym, true);
+
+      if (sym != NULL
+	  && __glibc_unlikely (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC)
+	  && __glibc_likely (sym->st_shndx != SHN_UNDEF)
+	  && __glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+
+      switch (r_type)
+	{
+	case MORELLO_R(CAPINIT):
+	case MORELLO_R(GLOB_DAT):
+	case MORELLO_R(JUMP_SLOT):
+	{
+	  if (sym == NULL)
+	    {
+	      /* Undefined weak symbol.  */
+	      *cap_reloc_addr = value + reloc->r_addend;
+	      break;
+	    }
+
+	  unsigned long perm_mask = CAP_PERM_MASK_RX;
+	  switch (ELFW(ST_TYPE) (sym->st_info))
+	    {
+	      case STT_OBJECT:
+		perm_mask = CAP_PERM_MASK_R;
+		for (int i = 0; i < sym_map->l_rw_count; i++)
+		  if (sym_map->l_rw_range[i].start <= value
+		      && sym_map->l_rw_range[i].end > value)
+		    {
+		      value = dl_rw_ptr (sym_map, value - sym_map->l_addr);
+		      perm_mask = CAP_PERM_MASK_RW;
+		      break;
+		    }
+		value = __builtin_cheri_bounds_set_exact (value, sym->st_size);
+		break;
+	      case STT_FUNC:
+	      case STT_GNU_IFUNC:
+		/* value already has RX bounds.  */
+		break;
+	      default:
+		/* STT_NONE or unknown symbol: readonly.  */
+		perm_mask = CAP_PERM_MASK_R;
+	    }
+	  value = value + reloc->r_addend;
+	  value = __builtin_cheri_perms_and (value, perm_mask);
+
+	  /* Seal capabilities, which provide execute permission, with MORELLO_RB.  */
+	  if (perm_mask == CAP_PERM_MASK_RX)
+	    value = __builtin_cheri_seal_entry (value);
+
+	  *cap_reloc_addr = value;
+	}
+	break;
+
+# ifndef RTLD_BOOTSTRAP
+	case AARCH64_R(ABS64):
+	  *u64_reloc_addr = value + reloc->r_addend;
+	  break;
+
+	case MORELLO_R(IRELATIVE):
+	{
+	  uintptr_t value = morello_relative (map->l_addr,
+					      map->l_map_start,
+					      map->l_rw_start,
+					      reloc,
+					      reloc_addr);
+	  if (__glibc_likely (!skip_ifunc))
+	    value = elf_ifunc_invoke (value);
+	  *cap_reloc_addr = value;
+	}
+	break;
+
+	case MORELLO_R(TLSDESC):
+	{
+	  struct tlsdesc volatile *td = reloc_addr;
+	  if (! sym)
+	    {
+	      td->pair.off = reloc->r_addend;
+	      td->entry = _dl_tlsdesc_undefweak;
+	    }
+	  else
+	    {
+#  ifndef SHARED
+	      CHECK_STATIC_TLS (map, sym_map);
+#  else
+	      if (!TRY_STATIC_TLS (map, sym_map))
+		{
+		  size_t size = td->pair.size;
+		  if (size == 0)
+		    size = sym->st_size;
+		  struct tlsdesc_dynamic_arg *arg = _dl_make_tlsdesc_dynamic
+		    (sym_map, sym->st_value + reloc->r_addend);
+		  arg->tlsinfo.ti_size = size;
+		  td->arg = arg;
+		  td->entry = _dl_tlsdesc_dynamic;
+		}
+	      else
+#  endif
+		{
+		  td->pair.off = sym->st_value + sym_map->l_tls_offset
+				 + reloc->r_addend;
+		  if (td->pair.size == 0)
+		    td->pair.size = sym->st_size;
+		  td->entry = _dl_tlsdesc_return;
+		}
+	    }
+	}
+	break;
+	case MORELLO_R(TPREL128):
+	{
+	  CHECK_STATIC_TLS (map, sym_map);
+	  u64_reloc_addr[0] = sym->st_value + reloc->r_addend
+			      + sym_map->l_tls_offset;
+	  if (u64_reloc_addr[1] == 0)
+	    u64_reloc_addr[1] = sym->st_size;
+	}
+	break;
+# endif /* !RTLD_BOOTSTRAP */
+	default:
+	  _dl_reloc_bad_type (map, r_type, 0);
+	  break;
+	}
+    }
+}
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_rela_relative (struct link_map *map, const ElfW(Rela) *reloc)
+{
+  ElfW(Addr) l_addr = map->l_addr;
+  uintptr_t cap_rx = map->l_map_start;
+  uintptr_t cap_rw = map->l_rw_start;
+  void *const reloc_addr
+    = (void *) __builtin_cheri_address_set (cap_rw, l_addr + reloc->r_offset);
+  uint64_t *__attribute__((may_alias)) u64_reloc_addr = reloc_addr;
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+  if (r_type == MORELLO_R(RELATIVE))
+    *cap_reloc_addr = morello_relative (l_addr, cap_rx, cap_rw,
+					reloc, reloc_addr);
+  else
+    *u64_reloc_addr = l_addr + reloc->r_addend;
+}
+
+static inline void
+__attribute__ ((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
+		      ElfW(Addr) l_addr,
+		      const ElfW(Rela) *reloc,
+		      int skip_ifunc)
+{
+  void *reloc_addr = (void *) dl_rw_ptr (map, reloc->r_offset);
+  uintptr_t *__attribute__((may_alias)) cap_reloc_addr = reloc_addr;
+  const unsigned int r_type = ELFW (R_TYPE) (reloc->r_info);
+  /* Check for unexpected PLT reloc type.  */
+  if (__builtin_expect (r_type == MORELLO_R(JUMP_SLOT), 1))
+    {
+      if (__glibc_unlikely (map->l_info[DT_AARCH64 (VARIANT_PCS)] != NULL))
+	{
+	  /* Check the symbol table for variant PCS symbols.  */
+	  const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+	  const ElfW (Sym) *symtab =
+	    (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+	  const ElfW (Sym) *sym = &symtab[symndx];
+	  if (__glibc_unlikely (sym->st_other & STO_AARCH64_VARIANT_PCS))
+	    {
+	      /* Avoid lazy resolution of variant PCS symbols.  */
+	      const struct r_found_version *version = NULL;
+	      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+		{
+		  const ElfW (Half) *vernum =
+		    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+		  version = &map->l_versions[vernum[symndx] & 0x7fff];
+		}
+	      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+				skip_ifunc);
+	      return;
+	    }
+	}
+
+      if (map->l_mach.plt == 0)
+	*cap_reloc_addr = dl_rx_ptr (map, *cap_reloc_addr);
+      else
+	*cap_reloc_addr = map->l_mach.plt;
+    }
+  else if (__builtin_expect (r_type == MORELLO_R(TLSDESC), 1))
+    {
+      const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
+      const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
+      const ElfW (Sym) *sym = &symtab[symndx];
+      const struct r_found_version *version = NULL;
+
+      if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
+	{
+	  const ElfW (Half) *vernum =
+	    (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
+	  version = &map->l_versions[vernum[symndx] & 0x7fff];
+	}
+
+      /* Always initialize TLS descriptors completely, because lazy
+	 initialization requires synchronization at every TLS access.  */
+      elf_machine_rela (map, scope, reloc, sym, version, reloc_addr,
+			skip_ifunc);
+    }
+  else if (__glibc_unlikely (r_type == MORELLO_R(IRELATIVE)))
+    {
+      uintptr_t value = morello_relative (map->l_addr, map->l_map_start,
+					  map->l_rw_start, reloc, reloc_addr);
+      if (__glibc_likely (!skip_ifunc))
+	value = elf_ifunc_invoke (value);
+      *cap_reloc_addr = value;
+    }
+  else
+    _dl_reloc_bad_type (map, r_type, 1);
+}
+
+#endif
diff --git a/sysdeps/aarch64/morello/dl-tlsdesc.S b/sysdeps/aarch64/morello/dl-tlsdesc.S
new file mode 100644
index 0000000000..6fced53734
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-tlsdesc.S
@@ -0,0 +1,229 @@
+/* Thread-local storage handling in the ELF dynamic linker.
+   AArch64 Morello version.
+   Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include <sysdep.h>
+#include <tls.h>
+#include "tlsdesc.h"
+
+#define NSAVEDQREGPAIRS	16
+#define SAVE_Q_REGISTERS				\
+	stp	q0, q1,	[csp, #-32*NSAVEDQREGPAIRS]!;	\
+	cfi_adjust_cfa_offset (32*NSAVEDQREGPAIRS);	\
+	stp	 q2,  q3, [csp, #32*1];			\
+	stp	 q4,  q5, [csp, #32*2];			\
+	stp	 q6,  q7, [csp, #32*3];			\
+	stp	 q8,  q9, [csp, #32*4];			\
+	stp	q10, q11, [csp, #32*5];			\
+	stp	q12, q13, [csp, #32*6];			\
+	stp	q14, q15, [csp, #32*7];			\
+	stp	q16, q17, [csp, #32*8];			\
+	stp	q18, q19, [csp, #32*9];			\
+	stp	q20, q21, [csp, #32*10];		\
+	stp	q22, q23, [csp, #32*11];		\
+	stp	q24, q25, [csp, #32*12];		\
+	stp	q26, q27, [csp, #32*13];		\
+	stp	q28, q29, [csp, #32*14];		\
+	stp	q30, q31, [csp, #32*15];
+
+#define RESTORE_Q_REGISTERS				\
+	ldp	 q2,  q3, [csp, #32*1];			\
+	ldp	 q4,  q5, [csp, #32*2];			\
+	ldp	 q6,  q7, [csp, #32*3];			\
+	ldp	 q8,  q9, [csp, #32*4];			\
+	ldp	q10, q11, [csp, #32*5];			\
+	ldp	q12, q13, [csp, #32*6];			\
+	ldp	q14, q15, [csp, #32*7];			\
+	ldp	q16, q17, [csp, #32*8];			\
+	ldp	q18, q19, [csp, #32*9];			\
+	ldp	q20, q21, [csp, #32*10];		\
+	ldp	q22, q23, [csp, #32*11];		\
+	ldp	q24, q25, [csp, #32*12];		\
+	ldp	q26, q27, [csp, #32*13];		\
+	ldp	q28, q29, [csp, #32*14];		\
+	ldp	q30, q31, [csp, #32*15];		\
+	ldp	 q0,  q1, [csp], #32*NSAVEDQREGPAIRS;	\
+	cfi_adjust_cfa_offset (-32*NSAVEDQREGPAIRS);
+
+	.text
+
+	/* Compute the address for symbols in the static TLS block.
+	   Prototype:
+	   _dl_tlsdesc_return (tlsdesc *tdp, void *unused, void *tp);
+	 */
+	.hidden _dl_tlsdesc_return
+	.global	_dl_tlsdesc_return
+	.type	_dl_tlsdesc_return,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_return:
+	ldp	x0, x1, [c0, #PTR_SIZE] /* Load offset, size.  */
+	add	c0, c2, x0
+	scbndse	c0, c0, x1
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_return, .-_dl_tlsdesc_return
+
+	/* Handler for undefined weak TLS symbols: returns NULL.
+	   Prototype:
+	   _dl_tlsdesc_undefweak (tlsdesc *tdp, void *unused, void *tp);
+	 */
+	.hidden _dl_tlsdesc_undefweak
+	.global	_dl_tlsdesc_undefweak
+	.type	_dl_tlsdesc_undefweak,%function
+	cfi_startproc
+	.align  2
+_dl_tlsdesc_undefweak:
+	mov	x0, 0
+	RET
+	cfi_endproc
+	.size	_dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak
+
+#ifdef SHARED
+	/* Handler for dynamic TLS symbols.
+	   Prototype:
+	   _dl_tlsdesc_dynamic (tlsdesc *tdp, void *unused, void *tp);
+
+	   The second word of the descriptor points to a
+	   tlsdesc_dynamic_arg structure.
+
+	   Returns the address of the tls object.
+
+	   void *
+	   _dl_tlsdesc_dynamic (struct tlsdesc *tdp, void *unused, void *tp)
+	   {
+	     struct tlsdesc_dynamic_arg *td = tdp->arg;
+	     dtv_t *dtv = *(dtv_t **)((char *)tp + TCBHEAD_DTV);
+	     if (__builtin_expect (td->gen_count <= dtv[0].counter
+		&& (dtv[td->tlsinfo.ti_module].pointer.val
+		    != TLS_DTV_UNALLOCATED),
+		1))
+	       return dtv[td->tlsinfo.ti_module].pointer.val
+		+ td->tlsinfo.ti_offset;
+
+	     return ___tls_get_addr (&td->tlsinfo);
+	   }
+	 */
+	.hidden _dl_tlsdesc_dynamic
+	.global	_dl_tlsdesc_dynamic
+	.type	_dl_tlsdesc_dynamic,%function
+	cfi_startproc
+	.align 2
+_dl_tlsdesc_dynamic:
+
+	/* Save just enough registers to support fast path, if we fall
+	   into slow path we will save additional registers.  */
+	stp	c3, c4, [csp, #-32]!
+	cfi_adjust_cfa_offset (32)
+	cfi_rel_offset (c3, 0)
+	cfi_rel_offset (c4, 16)
+
+	ldr	c1, [c0,#TLSDESC_ARG]
+	ldr	c0, [c2,#TCBHEAD_DTV]
+	ldr	x3, [c1,#TLSDESC_GEN_COUNT]
+	ldr	x4, [c0,#DTV_COUNTER]
+	cmp	x3, x4
+	b.hi	2f
+	/* Load r3 = td->tlsinfo.ti_module and r4 = td->tlsinfo.ti_offset.  */
+	ldp	x3, x4, [c1,#TLSDESC_MODID]
+	lsl	x3, x3, #(PTR_LOG_SIZE+1)
+	ldr	c0, [c0, x3] /* Load val member of DTV entry.  */
+	cmp	x0, #TLS_DTV_UNALLOCATED
+	b.eq	2f
+	cfi_remember_state
+	/* Load r3 = td->tlsinfo.ti_size.  */
+	ldr	x3, [c1, #TLSDESC_SIZE]
+	add	c0, c0, x4
+	scbndse	c0, c0, x3
+1:
+	ldp	c3, c4, [csp], #32
+	cfi_adjust_cfa_offset (-32)
+	RET
+2:
+	/* This is the slow path. We need to call __tls_get_addr() which
+	   means we need to save and restore all the register that the
+	   callee will trash.  */
+
+	/* Save the remaining registers that we must treat as caller save.  */
+	cfi_restore_state
+
+# define NSAVEXREGPAIRS 9
+	stp	c29, c30, [csp,#-32*NSAVEXREGPAIRS]!
+	cfi_adjust_cfa_offset (32*NSAVEXREGPAIRS)
+	cfi_rel_offset (c29, 0)
+	cfi_rel_offset (c30, 16)
+	mov	c29, csp
+	stp	 c5,  c6, [csp, #32*1]
+	stp	 c7,  c8, [csp, #32*2]
+	stp	 c9, c10, [csp, #32*3]
+	stp	c11, c12, [csp, #32*4]
+	stp	c13, c14, [csp, #32*5]
+	stp	c15, c16, [csp, #32*6]
+	stp	c17, c18, [csp, #32*7]
+	cfi_rel_offset (c5, 32*1)
+	cfi_rel_offset (c6, 32*1+8)
+	cfi_rel_offset (c7, 32*2)
+	cfi_rel_offset (c8, 32*2+8)
+	cfi_rel_offset (c9, 32*3)
+	cfi_rel_offset (c10, 32*3+8)
+	cfi_rel_offset (c11, 32*4)
+	cfi_rel_offset (c12, 32*4+8)
+	cfi_rel_offset (c13, 32*5)
+	cfi_rel_offset (c14, 32*5+8)
+	cfi_rel_offset (c15, 32*6)
+	cfi_rel_offset (c16, 32*6+8)
+	cfi_rel_offset (c17, 32*7)
+	cfi_rel_offset (c18, 32*7+8)
+
+	SAVE_Q_REGISTERS
+
+	/* TODO: remove once __tls_get_addr is fixed.  */
+	str	c1, [csp, #32*8]
+
+	mov	c0, c1
+	bl	__tls_get_addr
+
+	/* TODO: __tls_get_addr should return bounded pointer,
+	   currently it does not so bound it here.  */
+	ldr	c1, [csp, #32*8]
+	ldr	x3, [c1, #TLSDESC_SIZE]
+	scbndse	c0, c0, x3
+
+	mrs	c2, ctpidr_el0 /* Restore c2.  */
+
+	RESTORE_Q_REGISTERS
+
+	ldp	 c5,  c6, [csp, #32*1]
+	ldp	 c7,  c8, [csp, #32*2]
+	ldp	 c9, c10, [csp, #32*3]
+	ldp	c11, c12, [csp, #32*4]
+	ldp	c13, c14, [csp, #32*5]
+	ldp	c15, c16, [csp, #32*6]
+	ldp	c17, c18, [csp, #32*7]
+
+	ldp	c29, c30, [csp], #32*NSAVEXREGPAIRS
+	cfi_adjust_cfa_offset (-32*NSAVEXREGPAIRS)
+	cfi_restore (c29)
+	cfi_restore (c30)
+
+	b	1b
+	cfi_endproc
+	.size	_dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic
+# undef NSAVEXREGPAIRS
+#endif
diff --git a/sysdeps/aarch64/morello/dl-tlsdesc.h b/sysdeps/aarch64/morello/dl-tlsdesc.h
new file mode 100644
index 0000000000..ced0965126
--- /dev/null
+++ b/sysdeps/aarch64/morello/dl-tlsdesc.h
@@ -0,0 +1,62 @@
+/* Thread-local storage descriptor handling in the ELF dynamic linker.
+   Morello version.
+   Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#ifndef _AARCH64_MORELLO_DL_TLSDESC_H
+#define _AARCH64_MORELLO_DL_TLSDESC_H 1
+
+/* Type used to represent a TLS descriptor in the GOT.  */
+struct tlsdesc
+{
+  void *(*entry) (struct tlsdesc *, void *, void *);
+  union {
+    void *arg;
+    struct { unsigned long off, size; } pair;
+  };
+};
+
+typedef struct dl_tls_index
+{
+  unsigned long int ti_module;
+  unsigned long int ti_offset;
+  unsigned long int ti_size;
+} tls_index;
+
+/* Type used as the argument in a TLS descriptor for a symbol that
+   needs dynamic TLS offsets.  */
+struct tlsdesc_dynamic_arg
+{
+  tls_index tlsinfo;
+  size_t gen_count;
+};
+
+extern attribute_hidden void *
+_dl_tlsdesc_return (struct tlsdesc *, void *, void *);
+
+extern attribute_hidden void *
+_dl_tlsdesc_undefweak (struct tlsdesc *, void *, void *);
+
+# ifdef SHARED
+extern void *_dl_make_tlsdesc_dynamic (struct link_map *, size_t);
+
+extern attribute_hidden void *
+_dl_tlsdesc_dynamic (struct tlsdesc *, void *, void *);
+#endif
+
+#endif
diff --git a/sysdeps/aarch64/morello/tlsdesc.sym b/sysdeps/aarch64/morello/tlsdesc.sym
new file mode 100644
index 0000000000..adb061eb0e
--- /dev/null
+++ b/sysdeps/aarch64/morello/tlsdesc.sym
@@ -0,0 +1,19 @@
+#include <stddef.h>
+#include <sysdep.h>
+#include <tls.h>
+#include <link.h>
+#include <dl-tlsdesc.h>
+
+--
+
+-- Abuse tls.h macros to derive offsets relative to the thread register.
+
+TLSDESC_ARG		offsetof(struct tlsdesc, arg)
+
+TLSDESC_GEN_COUNT	offsetof(struct tlsdesc_dynamic_arg, gen_count)
+TLSDESC_MODID		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_module)
+TLSDESC_MODOFF		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_offset)
+TLSDESC_SIZE		offsetof(struct tlsdesc_dynamic_arg, tlsinfo.ti_size)
+TCBHEAD_DTV		offsetof(tcbhead_t, dtv)
+DTV_COUNTER		offsetof(dtv_t, counter)
+TLS_DTV_UNALLOCATED	TLS_DTV_UNALLOCATED
diff --git a/sysdeps/aarch64/sys/ifunc.h b/sysdeps/aarch64/sys/ifunc.h
index 582c834e74..82e6a0a5e3 100644
--- a/sysdeps/aarch64/sys/ifunc.h
+++ b/sysdeps/aarch64/sys/ifunc.h
@@ -24,7 +24,7 @@
 
 /* The prototype of a gnu indirect function resolver on AArch64 is
 
-     ElfW(Addr) ifunc_resolver (uint64_t, const __ifunc_arg_t *);
+     elfptr_t ifunc_resolver (uint64_t, const __ifunc_arg_t *);
 
    the first argument should have the _IFUNC_ARG_HWCAP bit set and
    the remaining bits should match the AT_HWCAP settings.  */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-11-23 14:48 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-23 14:48 [glibc/arm/morello/main] aarch64: morello: dynamic linking support Szabolcs Nagy
  -- strict thread matches above, loose matches on Subject: below --
2022-10-27 13:58 Szabolcs Nagy
2022-10-26 15:20 Szabolcs Nagy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).