public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Siddhesh Poyarekar <siddhesh@gotplt.org>
To: Richard Earnshaw <rearnsha@arm.com>, libc-alpha@sourceware.org
Subject: Re: [PATCH v4 3/6] malloc: Basic support for memory tagging in the malloc() family
Date: Mon, 21 Dec 2020 18:57:13 +0530	[thread overview]
Message-ID: <8f82550f-b888-9a23-dba6-22f876e2323d@gotplt.org> (raw)
In-Reply-To: <20201218192957.11035-4-rearnsha@arm.com>

On 12/19/20 12:59 AM, Richard Earnshaw via Libc-alpha wrote:
> 
> This patch adds the basic support for memory tagging.
> 
> Various flavours are supported, particularly being able to turn on
> tagged memory at run-time: this allows the same code to be used on
> systems where memory tagging support is not present without neededing
> a separate build of glibc.  Also, depending on whether the kernel
> supports it, the code will use mmap for the default arena if morecore
> does not, or cannot support tagged memory (on AArch64 it is not
> available).
> 
> All the hooks use function pointers to allow this to work without
> needing ifuncs.
> ---
>   malloc/arena.c              |  59 ++++++-
>   malloc/hooks.c              |  79 ++++++---
>   malloc/malloc.c             | 336 +++++++++++++++++++++++++++++-------
>   malloc/malloc.h             |   7 +
>   sysdeps/generic/libc-mtag.h |  52 ++++++
>   5 files changed, 436 insertions(+), 97 deletions(-)
>   create mode 100644 sysdeps/generic/libc-mtag.h

In summary, a nit or two below, the primary one being that the header 
changes in malloc/malloc.h should probably go into include/malloc.h 
since I couldn't see a reason for those changes to go into an installed 
header.

The change looks fine otherwise.

> diff --git a/malloc/arena.c b/malloc/arena.c
> index 3c9c0ecd86..ba833a2d98 100644
> --- a/malloc/arena.c
> +++ b/malloc/arena.c
> @@ -274,17 +274,52 @@ next_env_entry (char ***position)
>  #endif
>  
>  
> -#ifdef SHARED
> +#if defined(SHARED) || defined(USE_MTAG)
>  static void *
>  __failing_morecore (ptrdiff_t d)
>  {
>    return (void *) MORECORE_FAILURE;
>  }
> +#endif
>  
> +#ifdef SHARED
>  extern struct dl_open_hook *_dl_open_hook;
>  libc_hidden_proto (_dl_open_hook);
>  #endif
>  
> +#ifdef USE_MTAG
> +
> +/* Generate a new (random) tag value for PTR and tag the memory it
> +   points to upto the end of the usable size for the chunk containing
> +   it.  Return the newly tagged pointer.  */
> +static void *
> +__mtag_tag_new_usable (void *ptr)
> +{
> +  if (ptr)
> +    {
> +      mchunkptr cp = mem2chunk(ptr);
> +      /* This likely will never happen, but we can't handle retagging
> +	 chunks from the dumped main arena.  So just return the
> +	 existing pointer.  */
> +      if (DUMPED_MAIN_ARENA_CHUNK (cp))
> +	return ptr;
> +      ptr = __libc_mtag_tag_region (__libc_mtag_new_tag (ptr),
> +				    CHUNK_AVAILABLE_SIZE (cp) - CHUNK_HDR_SZ);
> +    }
> +  return ptr;
> +}
> +
> +/* Generate a new (random) tag value for PTR, set the tags for the
> +   memory to the new tag and initialize the memory contents to VAL.
> +   In practice this function will only be called with VAL=0, but we
> +   keep this parameter to maintain the same prototype as memset.  */
> +static void *
> +__mtag_tag_new_memset (void *ptr, int val, size_t size)
> +{
> +  return __libc_mtag_memset_with_tag (__libc_mtag_new_tag (ptr), val, size);
> +}
> +#endif

Ok.

> +
>  static void
>  ptmalloc_init (void)
>  {
> @@ -293,6 +328,24 @@ ptmalloc_init (void)
>  
>    __malloc_initialized = 0;
>  
> +#ifdef USE_MTAG
> +  if ((TUNABLE_GET_FULL (glibc, mem, tagging, int32_t, NULL) & 1) != 0)
> +    {
> +      /* If the environment says that we should be using tagged memory

If the *tunable* says...

> +	 and that morecore does not support tagged regions, then
> +	 disable it.  */
> +      if (__MTAG_SBRK_UNTAGGED)
> +	__morecore = __failing_morecore;
> +
> +      __mtag_mmap_flags = __MTAG_MMAP_FLAGS;
> +      __tag_new_memset = __mtag_tag_new_memset;
> +      __tag_region = __libc_mtag_tag_region;
> +      __tag_new_usable = __mtag_tag_new_usable;
> +      __tag_at = __libc_mtag_address_get_tag;
> +      __mtag_granule_mask = ~(size_t)(__MTAG_GRANULE_SIZE - 1);
> +    }
> +#endif
> +
>  #ifdef SHARED
>    /* In case this libc copy is in a non-default namespace, never use
>       brk.  Likewise if dlopened from statically linked program.  The
> @@ -509,7 +562,7 @@ new_heap (size_t size, size_t top_pad)
>              }
>          }
>      }
> -  if (__mprotect (p2, size, PROT_READ | PROT_WRITE) != 0)
> +  if (__mprotect (p2, size, MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0)
>      {
>        __munmap (p2, HEAP_MAX_SIZE);
>        return 0;
> @@ -539,7 +592,7 @@ grow_heap (heap_info *h, long diff)
>      {
>        if (__mprotect ((char *) h + h->mprotect_size,
>                        (unsigned long) new_size - h->mprotect_size,
> -                      PROT_READ | PROT_WRITE) != 0)
> +                      MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE) != 0)
>          return -2;
>  
>        h->mprotect_size = new_size;
> diff --git a/malloc/hooks.c b/malloc/hooks.c
> index a2b93e5446..8a1c16dfa4 100644
> --- a/malloc/hooks.c
> +++ b/malloc/hooks.c
> @@ -63,6 +63,13 @@ __malloc_check_init (void)
>    __memalign_hook = memalign_check;
>  }
>  
> +/* When memory is tagged, the checking data is stored in the user part
> +   of the chunk.  We can't rely on the user not having modified the
> +   tags, so fetch the tag at each location before dereferencing
> +   it.  */
> +#define SAFE_CHAR_OFFSET(p,offset) \
> +  ((unsigned char *) TAG_AT (((unsigned char *) p) + offset))
> +

OK.

>  /* A simple, standard set of debugging hooks.  Overhead is `only' one
>     byte per chunk; still this will catch most cases of double frees or
>     overruns.  The goal here is to avoid obscure crashes due to invalid
> @@ -80,7 +87,6 @@ magicbyte (const void *p)
>    return magic;
>  }
>  
> -
>  /* Visualize the chunk as being partitioned into blocks of 255 bytes from the
>     highest address of the chunk, downwards.  The end of each block tells
>     us the size of that block, up to the actual size of the requested
> @@ -96,16 +102,16 @@ malloc_check_get_size (mchunkptr p)
>  
>    assert (using_malloc_checking == 1);
>  
> -  for (size = chunksize (p) - 1 + (chunk_is_mmapped (p) ? 0 : SIZE_SZ);
> -       (c = ((unsigned char *) p)[size]) != magic;
> +  for (size = CHUNK_AVAILABLE_SIZE (p) - 1;
> +       (c = *SAFE_CHAR_OFFSET (p, size)) != magic;
>         size -= c)
>      {
> -      if (c <= 0 || size < (c + 2 * SIZE_SZ))
> +      if (c <= 0 || size < (c + CHUNK_HDR_SZ))
>  	malloc_printerr ("malloc_check_get_size: memory corruption");
>      }
>  
>    /* chunk2mem size.  */
> -  return size - 2 * SIZE_SZ;
> +  return size - CHUNK_HDR_SZ;
>  }
>  
>  /* Instrument a chunk with overrun detector byte(s) and convert it
> @@ -124,9 +130,8 @@ mem2mem_check (void *ptr, size_t req_sz)
>  
>    p = mem2chunk (ptr);
>    magic = magicbyte (p);
> -  max_sz = chunksize (p) - 2 * SIZE_SZ;
> -  if (!chunk_is_mmapped (p))
> -    max_sz += SIZE_SZ;
> +  max_sz = CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ;
> +
>    for (i = max_sz - 1; i > req_sz; i -= block_sz)
>      {
>        block_sz = MIN (i - req_sz, 0xff);
> @@ -135,9 +140,9 @@ mem2mem_check (void *ptr, size_t req_sz)
>        if (block_sz == magic)
>          --block_sz;
>  
> -      m_ptr[i] = block_sz;
> +      *SAFE_CHAR_OFFSET (m_ptr, i) = block_sz;
>      }
> -  m_ptr[req_sz] = magic;
> +  *SAFE_CHAR_OFFSET (m_ptr, req_sz) = magic;
>    return (void *) m_ptr;
>  }
>  
> @@ -170,9 +175,11 @@ mem2chunk_check (void *mem, unsigned char **magic_p)
>                                 next_chunk (prev_chunk (p)) != p)))
>          return NULL;
>  
> -      for (sz += SIZE_SZ - 1; (c = ((unsigned char *) p)[sz]) != magic; sz -= c)
> +      for (sz = CHUNK_AVAILABLE_SIZE (p) - 1;
> +	   (c = *SAFE_CHAR_OFFSET (p, sz)) != magic;
> +	   sz -= c)
>          {
> -          if (c == 0 || sz < (c + 2 * SIZE_SZ))
> +          if (c == 0 || sz < (c + CHUNK_HDR_SZ))
>              return NULL;
>          }
>      }
> @@ -193,15 +200,19 @@ mem2chunk_check (void *mem, unsigned char **magic_p)
>            ((prev_size (p) + sz) & page_mask) != 0)
>          return NULL;
>  
> -      for (sz -= 1; (c = ((unsigned char *) p)[sz]) != magic; sz -= c)
> +      for (sz = CHUNK_AVAILABLE_SIZE (p) - 1;
> +	   (c = *SAFE_CHAR_OFFSET (p, sz)) != magic;
> +	   sz -= c)
>          {
> -          if (c == 0 || sz < (c + 2 * SIZE_SZ))
> +          if (c == 0 || sz < (c + CHUNK_HDR_SZ))
>              return NULL;
>          }
>      }
> -  ((unsigned char *) p)[sz] ^= 0xFF;
> +
> +  unsigned char* safe_p = SAFE_CHAR_OFFSET (p, sz);
> +  *safe_p ^= 0xFF;
>    if (magic_p)
> -    *magic_p = (unsigned char *) p + sz;
> +    *magic_p = safe_p;
>    return p;
>  }
>  
> @@ -238,7 +249,7 @@ malloc_check (size_t sz, const void *caller)
>    top_check ();
>    victim = _int_malloc (&main_arena, nb);
>    __libc_lock_unlock (main_arena.mutex);
> -  return mem2mem_check (victim, sz);
> +  return mem2mem_check (TAG_NEW_USABLE (victim), sz);
>  }
>  
>  static void
> @@ -249,6 +260,12 @@ free_check (void *mem, const void *caller)
>    if (!mem)
>      return;
>  
> +#ifdef USE_MTAG
> +  /* Quickly check that the freed pointer matches the tag for the memory.
> +     This gives a useful double-free detection.  */
> +  *(volatile char *)mem;
> +#endif
> +

OK.

>    __libc_lock_lock (main_arena.mutex);
>    p = mem2chunk_check (mem, NULL);
>    if (!p)
> @@ -259,6 +276,8 @@ free_check (void *mem, const void *caller)
>        munmap_chunk (p);
>        return;
>      }
> +  /* Mark the chunk as belonging to the library again.  */
> +  (void)TAG_REGION (chunk2rawmem (p), CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ);
>    _int_free (&main_arena, p, 1);
>    __libc_lock_unlock (main_arena.mutex);
>  }
> @@ -266,7 +285,7 @@ free_check (void *mem, const void *caller)
>  static void *
>  realloc_check (void *oldmem, size_t bytes, const void *caller)
>  {
> -  INTERNAL_SIZE_T nb;
> +  INTERNAL_SIZE_T chnb;
>    void *newmem = 0;
>    unsigned char *magic_p;
>    size_t rb;
> @@ -284,14 +303,21 @@ realloc_check (void *oldmem, size_t bytes, const void *caller)
>        free_check (oldmem, NULL);
>        return NULL;
>      }
> +
> +#ifdef USE_MTAG
> +  /* Quickly check that the freed pointer matches the tag for the memory.
> +     This gives a useful double-free detection.  */
> +  *(volatile char *)oldmem;
> +#endif
> +
>    __libc_lock_lock (main_arena.mutex);
>    const mchunkptr oldp = mem2chunk_check (oldmem, &magic_p);
>    __libc_lock_unlock (main_arena.mutex);
>    if (!oldp)
>      malloc_printerr ("realloc(): invalid pointer");
> -  const INTERNAL_SIZE_T oldsize = chunksize (oldp);
> +  const INTERNAL_SIZE_T oldchsize = CHUNK_AVAILABLE_SIZE (oldp);
>  
> -  if (!checked_request2size (rb, &nb))
> +  if (!checked_request2size (rb, &chnb))
>      goto invert;
>  
>    __libc_lock_lock (main_arena.mutex);
> @@ -299,14 +325,13 @@ realloc_check (void *oldmem, size_t bytes, const void *caller)
>    if (chunk_is_mmapped (oldp))
>      {
>  #if HAVE_MREMAP
> -      mchunkptr newp = mremap_chunk (oldp, nb);
> +      mchunkptr newp = mremap_chunk (oldp, chnb);
>        if (newp)
>          newmem = chunk2mem (newp);
>        else
>  #endif
>        {
> -        /* Note the extra SIZE_SZ overhead. */
> -        if (oldsize - SIZE_SZ >= nb)
> +        if (oldchsize >= chnb)
>            newmem = oldmem; /* do nothing */
>          else
>            {
> @@ -315,7 +340,7 @@ realloc_check (void *oldmem, size_t bytes, const void *caller)
>  	    newmem = _int_malloc (&main_arena, rb);
>              if (newmem)
>                {
> -                memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
> +                memcpy (newmem, oldmem, oldchsize - CHUNK_HDR_SZ);
>                  munmap_chunk (oldp);
>                }
>            }
> @@ -324,7 +349,7 @@ realloc_check (void *oldmem, size_t bytes, const void *caller)
>    else
>      {
>        top_check ();
> -      newmem = _int_realloc (&main_arena, oldp, oldsize, nb);
> +      newmem = _int_realloc (&main_arena, oldp, oldchsize, chnb);
>      }
>  
>    DIAG_PUSH_NEEDS_COMMENT;
> @@ -343,7 +368,7 @@ invert:
>  
>    __libc_lock_unlock (main_arena.mutex);
>  
> -  return mem2mem_check (newmem, bytes);
> +  return mem2mem_check (TAG_NEW_USABLE (newmem), bytes);
>  }
>  
>  static void *
> @@ -385,7 +410,7 @@ memalign_check (size_t alignment, size_t bytes, const void *caller)
>    top_check ();
>    mem = _int_memalign (&main_arena, alignment, bytes + 1);
>    __libc_lock_unlock (main_arena.mutex);
> -  return mem2mem_check (mem, bytes);
> +  return mem2mem_check (TAG_NEW_USABLE (mem), bytes);
>  }
>  
>  #if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_25)
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index 326075e704..a3e914fa8a 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -242,6 +242,9 @@
>  /* For DIAG_PUSH/POP_NEEDS_COMMENT et al.  */
>  #include <libc-diag.h>
>  
> +/* For memory tagging.  */
> +#include <libc-mtag.h>
> +
>  #include <malloc/malloc-internal.h>
>  
>  /* For SINGLE_THREAD_P.  */
> @@ -380,6 +383,96 @@ __malloc_assert (const char *assertion, const char *file, unsigned int line,
>  void * __default_morecore (ptrdiff_t);
>  void *(*__morecore)(ptrdiff_t) = __default_morecore;
>  
> +/* Memory tagging.  */
> +
> +/* Some systems support the concept of tagging (sometimes known as
> +   coloring) memory locations on a fine grained basis.  Each memory
> +   location is given a color (normally allocated randomly) and
> +   pointers are also colored.  When the pointer is dereferenced, the
> +   pointer's color is checked against the memory's color and if they
> +   differ the access is faulted (sometimes lazily).
> +
> +   We use this in glibc by maintaining a single color for the malloc
> +   data structures that are interleaved with the user data and then
> +   assigning separate colors for each block allocation handed out.  In
> +   this way simple buffer overruns will be rapidly detected.  When
> +   memory is freed, the memory is recolored back to the glibc default
> +   so that simple use-after-free errors can also be detected.
> +
> +   If memory is reallocated the buffer is recolored even if the
> +   address remains the same.  This has a performance impact, but
> +   guarantees that the old pointer cannot mistakenly be reused (code
> +   that compares old against new will see a mismatch and will then
> +   need to behave as though realloc moved the data to a new location).
> +
> +   Internal API for memory tagging support.
> +
> +   The aim is to keep the code for memory tagging support as close to
> +   the normal APIs in glibc as possible, so that if tagging is not
> +   enabled in the library, or is disabled at runtime then standard
> +   operations can continue to be used.  Support macros are used to do
> +   this:
> +
> +   void *TAG_NEW_MEMSET (void *ptr, int, val, size_t size)
> +
> +   Has the same interface as memset(), but additionally allocates a
> +   new tag, colors the memory with that tag and returns a pointer that
> +   is correctly colored for that location.  The non-tagging version
> +   will simply call memset.
> +
> +   void *TAG_REGION (void *ptr, size_t size)
> +
> +   Color the region of memory pointed to by PTR and size SIZE with
> +   the color of PTR.  Returns the original pointer.
> +
> +   void *TAG_NEW_USABLE (void *ptr)
> +
> +   Allocate a new random color and use it to color the user region of
> +   a chunk; this may include data from the subsequent chunk's header
> +   if tagging is sufficiently fine grained.  Returns PTR suitably
> +   recolored for accessing the memory there.
> +
> +   void *TAG_AT (void *ptr)
> +
> +   Read the current color of the memory at the address pointed to by
> +   PTR (ignoring it's current color) and return PTR recolored to that
> +   color.  PTR must be valid address in all other respects.  When
> +   tagging is not enabled, it simply returns the original pointer.
> +*/

OK.

> +
> +#ifdef USE_MTAG
> +
> +/* Default implementaions when memory tagging is supported, but disabled.  */
> +static void *
> +__default_tag_region (void *ptr, size_t size)
> +{
> +  return ptr;
> +}
> +
> +static void *
> +__default_tag_nop (void *ptr)
> +{
> +  return ptr;
> +}
> +
> +static int __mtag_mmap_flags = 0;
> +static size_t __mtag_granule_mask = ~(size_t)0;
> +
> +static void *(*__tag_new_memset)(void *, int, size_t) = memset;
> +static void *(*__tag_region)(void *, size_t) = __default_tag_region;
> +static void *(*__tag_new_usable)(void *) = __default_tag_nop;
> +static void *(*__tag_at)(void *) = __default_tag_nop;
> +
> +# define TAG_NEW_MEMSET(ptr, val, size) __tag_new_memset (ptr, val, size)
> +# define TAG_REGION(ptr, size) __tag_region (ptr, size)
> +# define TAG_NEW_USABLE(ptr) __tag_new_usable (ptr)
> +# define TAG_AT(ptr) __tag_at (ptr)
> +#else
> +# define TAG_NEW_MEMSET(ptr, val, size) memset (ptr, val, size)
> +# define TAG_REGION(ptr, size) (ptr)
> +# define TAG_NEW_USABLE(ptr) (ptr)
> +# define TAG_AT(ptr) (ptr)
> +#endif

Default implementations, OK.

>  
>  #include <string.h>
>  
> @@ -1187,10 +1280,31 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
>    ---------- Size and alignment checks and conversions ----------
>  */
>  
> -/* conversion from malloc headers to user pointers, and back */
> +/* Conversion from malloc headers to user pointers, and back.  When
> +   using memory tagging the user data and the malloc data structure
> +   headers have distinct tags.  Converting fully from one to the other
> +   involves extracting the tag at the other address and creating a
> +   suitable pointer using it.  That can be quite expensive.  There are
> +   many occasions, though when the pointer will not be dereferenced
> +   (for example, because we only want to assert that the pointer is
> +   correctly aligned).  In these cases it is more efficient not
> +   to extract the tag, since the answer will be the same either way.
> +   chunk2rawmem() can be used in these cases.
> + */
> +
> +/* The chunk header is two SIZE_SZ elements, but this is used widely, so
> +   we define it here for clarity later.  */
> +#define CHUNK_HDR_SZ (2 * SIZE_SZ)
> +
> +/* Convert a user mem pointer to a chunk address without correcting
> +   the tag.  */
> +#define chunk2rawmem(p) ((void*)((char*)(p) + CHUNK_HDR_SZ))
>  
> -#define chunk2mem(p)   ((void*)((char*)(p) + 2*SIZE_SZ))
> -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - 2*SIZE_SZ))
> +/* Convert between user mem pointers and chunk pointers, updating any
> +   memory tags on the pointer to respect the tag value at that
> +   location.  */
> +#define chunk2mem(p) ((void*)TAG_AT (((char*)(p) + CHUNK_HDR_SZ)))
> +#define mem2chunk(mem) ((mchunkptr)TAG_AT (((char*)(mem) - CHUNK_HDR_SZ)))
>  
>  /* The smallest possible chunk */
>  #define MIN_CHUNK_SIZE        (offsetof(struct malloc_chunk, fd_nextsize))
> @@ -1205,16 +1319,28 @@ nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
>  #define aligned_OK(m)  (((unsigned long)(m) & MALLOC_ALIGN_MASK) == 0)
>  
>  #define misaligned_chunk(p) \
> -  ((uintptr_t)(MALLOC_ALIGNMENT == 2 * SIZE_SZ ? (p) : chunk2mem (p)) \
> +  ((uintptr_t)(MALLOC_ALIGNMENT == CHUNK_HDR_SZ ? (p) : chunk2mem (p)) \
>     & MALLOC_ALIGN_MASK)
>  
>  /* pad request bytes into a usable size -- internal version */
> -
> +/* Note: This must be a macro that evaluates to a compile time constant
> +   if passed a literal constant.  */
>  #define request2size(req)                                         \
>    (((req) + SIZE_SZ + MALLOC_ALIGN_MASK < MINSIZE)  ?             \
>     MINSIZE :                                                      \
>     ((req) + SIZE_SZ + MALLOC_ALIGN_MASK) & ~MALLOC_ALIGN_MASK)
>  
> +/* Available size of chunk.  This is the size of the real usable data
> +   in the chunk, plus the chunk header.  */
> +#ifdef USE_MTAG
> +#define CHUNK_AVAILABLE_SIZE(p) \
> +  ((chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))	\
> +   & __mtag_granule_mask)
> +#else
> +#define CHUNK_AVAILABLE_SIZE(p) \
> +  (chunksize (p) + (chunk_is_mmapped (p) ? 0 : SIZE_SZ))
> +#endif
> +
>  /* Check if REQ overflows when padded and aligned and if the resulting value
>     is less than PTRDIFF_T.  Returns TRUE and the requested size or MINSIZE in
>     case the value is less than MINSIZE on SZ or false if any of the previous
> @@ -1224,6 +1350,18 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
>  {
>    if (__glibc_unlikely (req > PTRDIFF_MAX))
>      return false;
> +
> +#ifdef USE_MTAG
> +  /* When using tagged memory, we cannot share the end of the user
> +     block with the header for the next chunk, so ensure that we
> +     allocate blocks that are rounded up to the granule size.  Take
> +     care not to overflow from close to MAX_SIZE_T to a small
> +     number.  Ideally, this would be part of request2size(), but that
> +     must be a macro that produces a compile time constant if passed
> +     a constant literal.  */
> +  req = (req + ~__mtag_granule_mask) & __mtag_granule_mask;
> +#endif
> +
>    *sz = request2size (req);
>    return true;
>  }
> @@ -1322,7 +1460,6 @@ checked_request2size (size_t req, size_t *sz) __nonnull (1)
>  /* Set size at footer (only when chunk is not in use) */
>  #define set_foot(p, s)       (((mchunkptr) ((char *) (p) + (s)))->mchunk_prev_size = (s))
>  
> -
>  #pragma GCC poison mchunk_size
>  #pragma GCC poison mchunk_prev_size
>  
> @@ -1418,7 +1555,7 @@ typedef struct malloc_chunk *mbinptr;
>  #define NBINS             128
>  #define NSMALLBINS         64
>  #define SMALLBIN_WIDTH    MALLOC_ALIGNMENT
> -#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > 2 * SIZE_SZ)
> +#define SMALLBIN_CORRECTION (MALLOC_ALIGNMENT > CHUNK_HDR_SZ)
>  #define MIN_LARGE_SIZE    ((NSMALLBINS - SMALLBIN_CORRECTION) * SMALLBIN_WIDTH)
>  
>  #define in_smallbin_range(sz)  \
> @@ -1969,7 +2106,7 @@ do_check_chunk (mstate av, mchunkptr p)
>        /* chunk is page-aligned */
>        assert (((prev_size (p) + sz) & (GLRO (dl_pagesize) - 1)) == 0);
>        /* mem is aligned */
> -      assert (aligned_OK (chunk2mem (p)));
> +      assert (aligned_OK (chunk2rawmem (p)));
>      }
>  }
>  
> @@ -1993,7 +2130,7 @@ do_check_free_chunk (mstate av, mchunkptr p)
>    if ((unsigned long) (sz) >= MINSIZE)
>      {
>        assert ((sz & MALLOC_ALIGN_MASK) == 0);
> -      assert (aligned_OK (chunk2mem (p)));
> +      assert (aligned_OK (chunk2rawmem (p)));
>        /* ... matching footer field */
>        assert (prev_size (next_chunk (p)) == sz);
>        /* ... and is fully consolidated */
> @@ -2072,7 +2209,7 @@ do_check_remalloced_chunk (mstate av, mchunkptr p, INTERNAL_SIZE_T s)
>    assert ((sz & MALLOC_ALIGN_MASK) == 0);
>    assert ((unsigned long) (sz) >= MINSIZE);
>    /* ... and alignment */
> -  assert (aligned_OK (chunk2mem (p)));
> +  assert (aligned_OK (chunk2rawmem (p)));
>    /* chunk is less than MINSIZE more than request */
>    assert ((long) (sz) - (long) (s) >= 0);
>    assert ((long) (sz) - (long) (s + MINSIZE) < 0);
> @@ -2318,7 +2455,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>           See the front_misalign handling below, for glibc there is no
>           need for further alignments unless we have have high alignment.
>         */
> -      if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
> +      if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
>          size = ALIGN_UP (nb + SIZE_SZ, pagesize);
>        else
>          size = ALIGN_UP (nb + SIZE_SZ + MALLOC_ALIGN_MASK, pagesize);
> @@ -2327,7 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>        /* Don't try if size wraps around 0 */
>        if ((unsigned long) (size) > (unsigned long) (nb))
>          {
> -          mm = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
> +          mm = (char *) (MMAP (0, size,
> +			       MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE, 0));
>  
>            if (mm != MAP_FAILED)
>              {
> @@ -2339,16 +2477,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>                   address argument for later munmap in free() and realloc().
>                 */
>  
> -              if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
> +              if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
>                  {
> -                  /* For glibc, chunk2mem increases the address by 2*SIZE_SZ and
> -                     MALLOC_ALIGN_MASK is 2*SIZE_SZ-1.  Each mmap'ed area is page
> -                     aligned and therefore definitely MALLOC_ALIGN_MASK-aligned.  */
> -                  assert (((INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK) == 0);
> +                  /* For glibc, chunk2rawmem increases the address by
> +                     CHUNK_HDR_SZ and MALLOC_ALIGN_MASK is
> +                     CHUNK_HDR_SZ-1.  Each mmap'ed area is page
> +                     aligned and therefore definitely
> +                     MALLOC_ALIGN_MASK-aligned.  */
> +                  assert (((INTERNAL_SIZE_T) chunk2rawmem (mm) & MALLOC_ALIGN_MASK) == 0);
>                    front_misalign = 0;
>                  }
>                else
> -                front_misalign = (INTERNAL_SIZE_T) chunk2mem (mm) & MALLOC_ALIGN_MASK;
> +                front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (mm) & MALLOC_ALIGN_MASK;
>                if (front_misalign > 0)
>                  {
>                    correction = MALLOC_ALIGNMENT - front_misalign;
> @@ -2436,18 +2576,20 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>               become the top chunk again later.  Note that a footer is set
>               up, too, although the chunk is marked in use. */
>            old_size = (old_size - MINSIZE) & ~MALLOC_ALIGN_MASK;
> -          set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ), 0 | PREV_INUSE);
> +          set_head (chunk_at_offset (old_top, old_size + CHUNK_HDR_SZ),
> +		    0 | PREV_INUSE);
>            if (old_size >= MINSIZE)
>              {
> -              set_head (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ) | PREV_INUSE);
> -              set_foot (chunk_at_offset (old_top, old_size), (2 * SIZE_SZ));
> +              set_head (chunk_at_offset (old_top, old_size),
> +			CHUNK_HDR_SZ | PREV_INUSE);
> +              set_foot (chunk_at_offset (old_top, old_size), CHUNK_HDR_SZ);
>                set_head (old_top, old_size | PREV_INUSE | NON_MAIN_ARENA);
>                _int_free (av, old_top, 1);
>              }
>            else
>              {
> -              set_head (old_top, (old_size + 2 * SIZE_SZ) | PREV_INUSE);
> -              set_foot (old_top, (old_size + 2 * SIZE_SZ));
> +              set_head (old_top, (old_size + CHUNK_HDR_SZ) | PREV_INUSE);
> +              set_foot (old_top, (old_size + CHUNK_HDR_SZ));
>              }
>          }
>        else if (!tried_mmap)
> @@ -2520,7 +2662,9 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>            /* Don't try if size wraps around 0 */
>            if ((unsigned long) (size) > (unsigned long) (nb))
>              {
> -              char *mbrk = (char *) (MMAP (0, size, PROT_READ | PROT_WRITE, 0));
> +              char *mbrk = (char *) (MMAP (0, size,
> +					   MTAG_MMAP_FLAGS | PROT_READ | PROT_WRITE,
> +					   0));
>  
>                if (mbrk != MAP_FAILED)
>                  {
> @@ -2591,7 +2735,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>  
>                    /* Guarantee alignment of first new chunk made from this space */
>  
> -                  front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
> +                  front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (brk) & MALLOC_ALIGN_MASK;
>                    if (front_misalign > 0)
>                      {
>                        /*
> @@ -2647,12 +2791,12 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>                /* handle non-contiguous cases */
>                else
>                  {
> -                  if (MALLOC_ALIGNMENT == 2 * SIZE_SZ)
> +                  if (MALLOC_ALIGNMENT == CHUNK_HDR_SZ)
>                      /* MORECORE/mmap must correctly align */
> -                    assert (((unsigned long) chunk2mem (brk) & MALLOC_ALIGN_MASK) == 0);
> +                    assert (((unsigned long) chunk2rawmem (brk) & MALLOC_ALIGN_MASK) == 0);
>                    else
>                      {
> -                      front_misalign = (INTERNAL_SIZE_T) chunk2mem (brk) & MALLOC_ALIGN_MASK;
> +                      front_misalign = (INTERNAL_SIZE_T) chunk2rawmem (brk) & MALLOC_ALIGN_MASK;
>                        if (front_misalign > 0)
>                          {
>                            /*
> @@ -2697,7 +2841,7 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>                           multiple of MALLOC_ALIGNMENT. We know there is at least
>                           enough space in old_top to do this.
>                         */
> -                      old_size = (old_size - 4 * SIZE_SZ) & ~MALLOC_ALIGN_MASK;
> +                      old_size = (old_size - 2 * CHUNK_HDR_SZ) & ~MALLOC_ALIGN_MASK;
>                        set_head (old_top, old_size | PREV_INUSE);
>  
>                        /*
> @@ -2707,9 +2851,10 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>                           lost.
>                         */
>  		      set_head (chunk_at_offset (old_top, old_size),
> -				(2 * SIZE_SZ) | PREV_INUSE);
> -		      set_head (chunk_at_offset (old_top, old_size + 2 * SIZE_SZ),
> -				(2 * SIZE_SZ) | PREV_INUSE);
> +				CHUNK_HDR_SZ | PREV_INUSE);
> +		      set_head (chunk_at_offset (old_top,
> +						 old_size + CHUNK_HDR_SZ),
> +				CHUNK_HDR_SZ | PREV_INUSE);
>  
>                        /* If possible, release the rest. */
>                        if (old_size >= MINSIZE)
> @@ -2837,7 +2982,7 @@ munmap_chunk (mchunkptr p)
>    if (DUMPED_MAIN_ARENA_CHUNK (p))
>      return;
>  
> -  uintptr_t mem = (uintptr_t) chunk2mem (p);
> +  uintptr_t mem = (uintptr_t) chunk2rawmem (p);
>    uintptr_t block = (uintptr_t) p - prev_size (p);
>    size_t total_size = prev_size (p) + size;
>    /* Unfortunately we have to do the compilers job by hand here.  Normally
> @@ -2892,7 +3037,7 @@ mremap_chunk (mchunkptr p, size_t new_size)
>  
>    p = (mchunkptr) (cp + offset);
>  
> -  assert (aligned_OK (chunk2mem (p)));
> +  assert (aligned_OK (chunk2rawmem (p)));
>  
>    assert (prev_size (p) == offset);
>    set_head (p, (new_size - offset) | IS_MMAPPED);
> @@ -3073,14 +3218,15 @@ __libc_malloc (size_t bytes)
>        && tcache
>        && tcache->counts[tc_idx] > 0)
>      {
> -      return tcache_get (tc_idx);
> +      victim = tcache_get (tc_idx);
> +      return TAG_NEW_USABLE (victim);
>      }
>    DIAG_POP_NEEDS_COMMENT;
>  #endif
>  
>    if (SINGLE_THREAD_P)
>      {
> -      victim = _int_malloc (&main_arena, bytes);
> +      victim = TAG_NEW_USABLE (_int_malloc (&main_arena, bytes));
>        assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
>  	      &main_arena == arena_for_chunk (mem2chunk (victim)));
>        return victim;
> @@ -3101,6 +3247,8 @@ __libc_malloc (size_t bytes)
>    if (ar_ptr != NULL)
>      __libc_lock_unlock (ar_ptr->mutex);
>  
> +  victim = TAG_NEW_USABLE (victim);
> +
>    assert (!victim || chunk_is_mmapped (mem2chunk (victim)) ||
>            ar_ptr == arena_for_chunk (mem2chunk (victim)));
>    return victim;
> @@ -3124,8 +3272,17 @@ __libc_free (void *mem)
>    if (mem == 0)                              /* free(0) has no effect */
>      return;
>  
> +#ifdef USE_MTAG
> +  /* Quickly check that the freed pointer matches the tag for the memory.
> +     This gives a useful double-free detection.  */
> +  *(volatile char *)mem;
> +#endif
> +
>    p = mem2chunk (mem);
>  
> +  /* Mark the chunk as belonging to the library again.  */
> +  (void)TAG_REGION (chunk2rawmem (p), CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ);
> +

Tagging mem with chunk colour.  OK.

>    if (chunk_is_mmapped (p))                       /* release mmapped memory. */
>      {
>        /* See if the dynamic brk/mmap threshold needs adjusting.
> @@ -3175,6 +3332,12 @@ __libc_realloc (void *oldmem, size_t bytes)
>    if (oldmem == 0)
>      return __libc_malloc (bytes);
>  
> +#ifdef USE_MTAG
> +  /* Perform a quick check to ensure that the pointer's tag matches the
> +     memory's tag.  */
> +  *(volatile char*) oldmem;
> +#endif
> +
>    /* chunk corresponding to oldmem */
>    const mchunkptr oldp = mem2chunk (oldmem);
>    /* its size */
> @@ -3217,7 +3380,7 @@ __libc_realloc (void *oldmem, size_t bytes)
>  	    return NULL;
>  	  /* Copy as many bytes as are available from the old chunk
>  	     and fit into the new size.  NB: The overhead for faked
> -	     mmapped chunks is only SIZE_SZ, not 2 * SIZE_SZ as for
> +	     mmapped chunks is only SIZE_SZ, not CHUNK_HDR_SZ as for
>  	     regular mmapped chunks.  */
>  	  if (bytes > oldsize - SIZE_SZ)
>  	    bytes = oldsize - SIZE_SZ;
> @@ -3230,7 +3393,15 @@ __libc_realloc (void *oldmem, size_t bytes)
>  #if HAVE_MREMAP
>        newp = mremap_chunk (oldp, nb);
>        if (newp)
> -        return chunk2mem (newp);
> +	{
> +	  void *newmem = chunk2rawmem (newp);
> +	  /* Give the new block a different tag.  This helps to ensure
> +	     that stale handles to the previous mapping are not
> +	     reused.  There's a performance hit for both us and the
> +	     caller for doing this, so we might want to
> +	     reconsider.  */
> +	  return TAG_NEW_USABLE (newmem);
> +	}
>  #endif
>        /* Note the extra SIZE_SZ overhead. */
>        if (oldsize - SIZE_SZ >= nb)
> @@ -3241,7 +3412,7 @@ __libc_realloc (void *oldmem, size_t bytes)
>        if (newmem == 0)
>          return 0;              /* propagate failure */
>  
> -      memcpy (newmem, oldmem, oldsize - 2 * SIZE_SZ);
> +      memcpy (newmem, oldmem, oldsize - CHUNK_HDR_SZ);
>        munmap_chunk (oldp);
>        return newmem;
>      }
> @@ -3328,8 +3499,7 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
>        p = _int_memalign (&main_arena, alignment, bytes);
>        assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
>  	      &main_arena == arena_for_chunk (mem2chunk (p)));
> -
> -      return p;
> +      return TAG_NEW_USABLE (p);
>      }
>  
>    arena_get (ar_ptr, bytes + alignment + MINSIZE);
> @@ -3347,7 +3517,7 @@ _mid_memalign (size_t alignment, size_t bytes, void *address)
>  
>    assert (!p || chunk_is_mmapped (mem2chunk (p)) ||
>            ar_ptr == arena_for_chunk (mem2chunk (p)));
> -  return p;
> +  return TAG_NEW_USABLE (p);
>  }
>  /* For ISO C11.  */
>  weak_alias (__libc_memalign, aligned_alloc)
> @@ -3356,17 +3526,22 @@ libc_hidden_def (__libc_memalign)
>  void *
>  __libc_valloc (size_t bytes)
>  {
> +  void *p;
> +
>    if (__malloc_initialized < 0)
>      ptmalloc_init ();
>  
>    void *address = RETURN_ADDRESS (0);
>    size_t pagesize = GLRO (dl_pagesize);
> -  return _mid_memalign (pagesize, bytes, address);
> +  p = _mid_memalign (pagesize, bytes, address);
> +  return TAG_NEW_USABLE (p);
>  }
>  
>  void *
>  __libc_pvalloc (size_t bytes)
>  {
> +  void *p;
> +
>    if (__malloc_initialized < 0)
>      ptmalloc_init ();
>  
> @@ -3383,19 +3558,22 @@ __libc_pvalloc (size_t bytes)
>      }
>    rounded_bytes = rounded_bytes & -(pagesize - 1);
>  
> -  return _mid_memalign (pagesize, rounded_bytes, address);
> +  p = _mid_memalign (pagesize, rounded_bytes, address);
> +  return TAG_NEW_USABLE (p);
>  }
>  
>  void *
>  __libc_calloc (size_t n, size_t elem_size)
>  {
>    mstate av;
> -  mchunkptr oldtop, p;
> -  INTERNAL_SIZE_T sz, csz, oldtopsize;
> +  mchunkptr oldtop;
> +  INTERNAL_SIZE_T sz, oldtopsize;
>    void *mem;
> +#ifndef USE_MTAG
>    unsigned long clearsize;
>    unsigned long nclears;
>    INTERNAL_SIZE_T *d;
> +#endif
>    ptrdiff_t bytes;
>  
>    if (__glibc_unlikely (__builtin_mul_overflow (n, elem_size, &bytes)))
> @@ -3403,6 +3581,7 @@ __libc_calloc (size_t n, size_t elem_size)
>         __set_errno (ENOMEM);
>         return NULL;
>      }
> +
>    sz = bytes;
>  
>    void *(*hook) (size_t, const void *) =
> @@ -3472,7 +3651,14 @@ __libc_calloc (size_t n, size_t elem_size)
>    if (mem == 0)
>      return 0;
>  
> -  p = mem2chunk (mem);
> +  mchunkptr p = mem2chunk (mem);
> +  /* If we are using memory tagging, then we need to set the tags
> +     regardless of MORECORE_CLEARS, so we zero the whole block while
> +     doing so.  */
> +#ifdef USE_MTAG
> +  return TAG_NEW_MEMSET (mem, 0, CHUNK_AVAILABLE_SIZE (p) - CHUNK_HDR_SZ);
> +#else
> +  INTERNAL_SIZE_T csz = chunksize (p);
>  
>    /* Two optional cases in which clearing not necessary */
>    if (chunk_is_mmapped (p))
> @@ -3483,8 +3669,6 @@ __libc_calloc (size_t n, size_t elem_size)
>        return mem;
>      }
>  
> -  csz = chunksize (p);
> -
>  #if MORECORE_CLEARS
>    if (perturb_byte == 0 && (p == oldtop && csz > oldtopsize))
>      {
> @@ -3527,6 +3711,7 @@ __libc_calloc (size_t n, size_t elem_size)
>      }
>  
>    return mem;
> +#endif
>  }

Clearing using TAG_MEMSET.  OK.

>  
>  /*
> @@ -3764,10 +3949,10 @@ _int_malloc (mstate av, size_t bytes)
>            size = chunksize (victim);
>            mchunkptr next = chunk_at_offset (victim, size);
>  
> -          if (__glibc_unlikely (size <= 2 * SIZE_SZ)
> +          if (__glibc_unlikely (size <= CHUNK_HDR_SZ)
>                || __glibc_unlikely (size > av->system_mem))
>              malloc_printerr ("malloc(): invalid size (unsorted)");
> -          if (__glibc_unlikely (chunksize_nomask (next) < 2 * SIZE_SZ)
> +          if (__glibc_unlikely (chunksize_nomask (next) < CHUNK_HDR_SZ)
>                || __glibc_unlikely (chunksize_nomask (next) > av->system_mem))
>              malloc_printerr ("malloc(): invalid next size (unsorted)");
>            if (__glibc_unlikely ((prev_size (next) & ~(SIZE_BITS)) != size))
> @@ -4269,7 +4454,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>        ) {
>  
>      if (__builtin_expect (chunksize_nomask (chunk_at_offset (p, size))
> -			  <= 2 * SIZE_SZ, 0)
> +			  <= CHUNK_HDR_SZ, 0)
>  	|| __builtin_expect (chunksize (chunk_at_offset (p, size))
>  			     >= av->system_mem, 0))
>        {
> @@ -4280,7 +4465,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>  	if (!have_lock)
>  	  {
>  	    __libc_lock_lock (av->mutex);
> -	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= 2 * SIZE_SZ
> +	    fail = (chunksize_nomask (chunk_at_offset (p, size)) <= CHUNK_HDR_SZ
>  		    || chunksize (chunk_at_offset (p, size)) >= av->system_mem);
>  	    __libc_lock_unlock (av->mutex);
>  	  }
> @@ -4289,7 +4474,7 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>  	  malloc_printerr ("free(): invalid next size (fast)");
>        }
>  
> -    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
> +    free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);
>  
>      atomic_store_relaxed (&av->have_fastchunks, true);
>      unsigned int idx = fastbin_index(size);
> @@ -4358,11 +4543,11 @@ _int_free (mstate av, mchunkptr p, int have_lock)
>        malloc_printerr ("double free or corruption (!prev)");
>  
>      nextsize = chunksize(nextchunk);
> -    if (__builtin_expect (chunksize_nomask (nextchunk) <= 2 * SIZE_SZ, 0)
> +    if (__builtin_expect (chunksize_nomask (nextchunk) <= CHUNK_HDR_SZ, 0)
>  	|| __builtin_expect (nextsize >= av->system_mem, 0))
>        malloc_printerr ("free(): invalid next size (normal)");
>  
> -    free_perturb (chunk2mem(p), size - 2 * SIZE_SZ);
> +    free_perturb (chunk2mem(p), size - CHUNK_HDR_SZ);
>  
>      /* consolidate backward */
>      if (!prev_inuse(p)) {
> @@ -4593,7 +4778,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
>    unsigned long    remainder_size;  /* its size */
>  
>    /* oldmem size */
> -  if (__builtin_expect (chunksize_nomask (oldp) <= 2 * SIZE_SZ, 0)
> +  if (__builtin_expect (chunksize_nomask (oldp) <= CHUNK_HDR_SZ, 0)
>        || __builtin_expect (oldsize >= av->system_mem, 0))
>      malloc_printerr ("realloc(): invalid old size");
>  
> @@ -4604,7 +4789,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
>  
>    next = chunk_at_offset (oldp, oldsize);
>    INTERNAL_SIZE_T nextsize = chunksize (next);
> -  if (__builtin_expect (chunksize_nomask (next) <= 2 * SIZE_SZ, 0)
> +  if (__builtin_expect (chunksize_nomask (next) <= CHUNK_HDR_SZ, 0)
>        || __builtin_expect (nextsize >= av->system_mem, 0))
>      malloc_printerr ("realloc(): invalid next size");
>  
> @@ -4626,7 +4811,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
>            av->top = chunk_at_offset (oldp, nb);
>            set_head (av->top, (newsize - nb) | PREV_INUSE);
>            check_inuse_chunk (av, oldp);
> -          return chunk2mem (oldp);
> +          return TAG_NEW_USABLE (chunk2rawmem (oldp));
>          }
>  
>        /* Try to expand forward into next chunk;  split off remainder below */
> @@ -4659,7 +4844,11 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
>              }
>            else
>              {
> -	      memcpy (newmem, chunk2mem (oldp), oldsize - SIZE_SZ);
> +	      void *oldmem = chunk2mem (oldp);
> +	      newmem = TAG_NEW_USABLE (newmem);
> +	      memcpy (newmem, oldmem,
> +		      CHUNK_AVAILABLE_SIZE (oldp) - CHUNK_HDR_SZ);
> +	      (void) TAG_REGION (chunk2rawmem (oldp), oldsize);
>                _int_free (av, oldp, 1);
>                check_inuse_chunk (av, newp);
>                return chunk2mem (newp);
> @@ -4681,6 +4870,8 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
>    else   /* split remainder */
>      {
>        remainder = chunk_at_offset (newp, nb);
> +      /* Clear any user-space tags before writing the header.  */
> +      remainder = TAG_REGION (remainder, remainder_size);
>        set_head_size (newp, nb | (av != &main_arena ? NON_MAIN_ARENA : 0));
>        set_head (remainder, remainder_size | PREV_INUSE |
>                  (av != &main_arena ? NON_MAIN_ARENA : 0));
> @@ -4690,7 +4881,7 @@ _int_realloc(mstate av, mchunkptr oldp, INTERNAL_SIZE_T oldsize,
>      }
>  
>    check_inuse_chunk (av, newp);
> -  return chunk2mem (newp);
> +  return TAG_NEW_USABLE (chunk2rawmem (newp));
>  }

OK.

>  
>  /*
> @@ -4768,7 +4959,7 @@ _int_memalign (mstate av, size_t alignment, size_t bytes)
>        p = newp;
>  
>        assert (newsize >= nb &&
> -              (((unsigned long) (chunk2mem (p))) % alignment) == 0);
> +              (((unsigned long) (chunk2rawmem (p))) % alignment) == 0);
>      }
>  
>    /* Also give back spare room at the end */
> @@ -4822,7 +5013,8 @@ mtrim (mstate av, size_t pad)
>                                                  + sizeof (struct malloc_chunk)
>                                                  + psm1) & ~psm1);
>  
> -                assert ((char *) chunk2mem (p) + 4 * SIZE_SZ <= paligned_mem);
> +                assert ((char *) chunk2rawmem (p) + 2 * CHUNK_HDR_SZ
> +			<= paligned_mem);
>                  assert ((char *) p + size > paligned_mem);
>  
>                  /* This is the size we could potentially free.  */
> @@ -4885,20 +5077,30 @@ musable (void *mem)
>    mchunkptr p;
>    if (mem != 0)
>      {
> +      size_t result = 0;
> +
>        p = mem2chunk (mem);
>  
>        if (__builtin_expect (using_malloc_checking == 1, 0))
> -        return malloc_check_get_size (p);
> +	return malloc_check_get_size (p);
>  
>        if (chunk_is_mmapped (p))
>  	{
>  	  if (DUMPED_MAIN_ARENA_CHUNK (p))
> -	    return chunksize (p) - SIZE_SZ;
> +	    result = chunksize (p) - SIZE_SZ;
>  	  else
> -	    return chunksize (p) - 2 * SIZE_SZ;
> +	    result = chunksize (p) - CHUNK_HDR_SZ;
>  	}
>        else if (inuse (p))
> -        return chunksize (p) - SIZE_SZ;
> +	result = chunksize (p) - SIZE_SZ;
> +
> +#ifdef USE_MTAG
> +      /* The usable space may be reduced if memory tagging is needed,
> +	 since we cannot share the user-space data with malloc's internal
> +	 data structure.  */
> +      result &= __mtag_granule_mask;
> +#endif
> +      return result;
>      }
>    return 0;
>  }

OK.

> diff --git a/malloc/malloc.h b/malloc/malloc.h
> index b2371f7704..0b20786b58 100644
> --- a/malloc/malloc.h
> +++ b/malloc/malloc.h
> @@ -77,6 +77,13 @@ extern void *pvalloc (size_t __size) __THROW __attribute_malloc__ __wur;
>     contiguous pieces of memory.  */
>  extern void *(*__morecore) (ptrdiff_t __size) __MALLOC_DEPRECATED;
>  
> +#ifdef USE_MTAG
> +extern int __mtag_mmap_flags;
> +#define MTAG_MMAP_FLAGS __mtag_mmap_flags
> +#else
> +#define MTAG_MMAP_FLAGS 0
> +#endif
> +

Do you really need this in an installed header?  If not, maybe just put 
it into include/malloc.h instead.

>  /* Default value of `__morecore'.  */
>  extern void *__default_morecore (ptrdiff_t __size)
>  __THROW __attribute_malloc__  __MALLOC_DEPRECATED;
> diff --git a/sysdeps/generic/libc-mtag.h b/sysdeps/generic/libc-mtag.h
> new file mode 100644
> index 0000000000..07f0203253
> --- /dev/null
> +++ b/sysdeps/generic/libc-mtag.h
> @@ -0,0 +1,52 @@
> +/* libc-internal interface for tagged (colored) memory support.
> +   Copyright (C) 2020 Free Software Foundation, Inc.
> +   This file is part of the GNU C Library.
> +
> +   The GNU C Library is free software; you can redistribute it and/or
> +   modify it under the terms of the GNU Lesser General Public
> +   License as published by the Free Software Foundation; either
> +   version 2.1 of the License, or (at your option) any later version.
> +
> +   The GNU C Library is distributed in the hope that it will be useful,
> +   but WITHOUT ANY WARRANTY; without even the implied warranty of
> +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> +   Lesser General Public License for more details.
> +
> +   You should have received a copy of the GNU Lesser General Public
> +   License along with the GNU C Library; if not, see
> +   <http://www.gnu.org/licenses/>.  */
> +
> +#ifndef _GENERIC_LIBC_MTAG_H
> +#define _GENERIC_LIBC_MTAG_H 1
> +
> +/* Generic bindings for systems that do not support memory tagging.  */
> +
> +/* Used to ensure additional alignment when objects need to have distinct
> +   tags.  */
> +#define __MTAG_GRANULE_SIZE 1
> +
> +/* Non-zero if memory obtained via morecore (sbrk) is not tagged.  */
> +#define __MTAG_SBRK_UNTAGGED 0
> +
> +/* Extra flags to pass to mmap() to request a tagged region of memory.  */
> +#define __MTAG_MMAP_FLAGS 0
> +
> +/* Set the tags for a region of memory, which must have size and alignment
> +   that are multiples of __MTAG_GRANULE_SIZE.  Size cannot be zero.
> +   void *__libc_mtag_tag_region (const void *, size_t)  */
> +#define __libc_mtag_tag_region(p, s) (p)
> +
> +/* Optimized equivalent to __libc_mtag_tag_region followed by memset.  */
> +#define __libc_mtag_memset_with_tag memset
> +
> +/* Convert address P to a pointer that is tagged correctly for that
> +   location.
> +   void *__libc_mtag_address_get_tag (void*)  */
> +#define __libc_mtag_address_get_tag(p) (p)
> +
> +/* Assign a new (random) tag to a pointer P (does not adjust the tag on
> +   the memory addressed).
> +   void *__libc_mtag_new_tag (void*)  */
> +#define __libc_mtag_new_tag(p) (p)
> +
> +#endif /* _GENERIC_LIBC_MTAG_H */



  reply	other threads:[~2020-12-21 13:27 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-18 19:29 [PATCH v4 0/6] Memory tagging support Richard Earnshaw
2020-12-18 19:29 ` [PATCH v4 1/6] config: Allow memory tagging to be enabled when configuring glibc Richard Earnshaw
2020-12-21 12:40   ` Siddhesh Poyarekar
2020-12-18 19:29 ` [PATCH v4 2/6] elf: Add a tunable to control use of tagged memory Richard Earnshaw
2020-12-21 12:42   ` Siddhesh Poyarekar
2020-12-18 19:29 ` [PATCH v4 3/6] malloc: Basic support for memory tagging in the malloc() family Richard Earnshaw
2020-12-21 13:27   ` Siddhesh Poyarekar [this message]
2020-12-21 13:46   ` Florian Weimer
2020-12-21 14:31     ` Richard Earnshaw
2020-12-21 14:31     ` Szabolcs Nagy
2020-12-18 19:29 ` [PATCH v4 4/6] linux: Add compatibility definitions to sys/prctl.h for MTE Richard Earnshaw
2020-12-21 13:32   ` Siddhesh Poyarekar
2020-12-21 13:34     ` Richard Earnshaw
2020-12-21 13:38       ` Siddhesh Poyarekar
2020-12-21 13:39     ` Florian Weimer
2020-12-21 13:41       ` Siddhesh Poyarekar
2020-12-18 19:29 ` [PATCH v4 5/6] aarch64: Add sysv specific enabling code for memory tagging Richard Earnshaw
2020-12-21 12:27   ` Szabolcs Nagy
2020-12-21 13:36   ` Siddhesh Poyarekar
2020-12-18 19:29 ` [PATCH v4 6/6] aarch64: Add aarch64-specific files for memory tagging support Richard Earnshaw
2020-12-21 12:44   ` Szabolcs Nagy
2020-12-21 12:50     ` Richard Earnshaw
2020-12-18 20:18 ` [PATCH v4 0/6] Memory " H.J. Lu
2020-12-21 12:28 ` Siddhesh Poyarekar
2020-12-21 13:44   ` Siddhesh Poyarekar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8f82550f-b888-9a23-dba6-22f876e2323d@gotplt.org \
    --to=siddhesh@gotplt.org \
    --cc=libc-alpha@sourceware.org \
    --cc=rearnsha@arm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).