public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Siddhesh Poyarekar <siddhesh@sourceware.org>
To: Adhemerval Zanella <adhemerval.zanella@linaro.org>,
	libc-alpha@sourceware.org
Cc: Norbert Manthey <nmanthey@conp-solutions.com>,
	Guillaume Morin <guillaume@morinfr.org>
Subject: Re: [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc
Date: Thu, 19 Aug 2021 06:33:49 +0530	[thread overview]
Message-ID: <a2192ff6-bf92-0a68-b5f0-9cc09eff634b@sourceware.org> (raw)
In-Reply-To: <20210818142000.128752-5-adhemerval.zanella@linaro.org>

On 8/18/21 7:50 PM, Adhemerval Zanella via Libc-alpha wrote:
> A new tunable, 'glibc.malloc.mmap_hugetlb', adds support to use Huge Page
> support directly with mmap() calls.  The required supported sizes and
> flags for mmap() are provided by an arch-specific internal hook
> malloc_hp_config().
> 
> Currently it first try mmap() using the huge page size and fallback to
> default page size and sbrk() call if kernel returns MMAP_FAILED.
> 
> The default malloc_hp_config() implementation does not enable it even
> if the tunable is set.
> 
> Checked on x86_64-linux-gnu.
> ---
>   NEWS                                       |   4 +
>   elf/dl-tunables.list                       |   4 +
>   elf/tst-rtld-list-tunables.exp             |   1 +
>   malloc/arena.c                             |   2 +
>   malloc/malloc.c                            |  35 +++++-
>   manual/tunables.texi                       |  14 +++
>   sysdeps/generic/malloc-hugepages.c         |   6 +
>   sysdeps/generic/malloc-hugepages.h         |  12 ++
>   sysdeps/unix/sysv/linux/malloc-hugepages.c | 125 +++++++++++++++++++++
>   9 files changed, 200 insertions(+), 3 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 9b2345d08c..412bf3e6f8 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -14,6 +14,10 @@ Major new features:
>     It might improve performance with Transparent Huge Pages madvise mode
>     depending of the workload.
>   
> +* On Linux, a new tunable, glibc.malloc.mmap_hugetlb, can be used to
> +  instruct malloc to try use Huge Pages when allocate memory with mmap()
> +  calls (through the use of MAP_HUGETLB).
> +
>   Deprecated and removed features, and other changes affecting compatibility:
>   
>     [Add deprecations, removals and changes affecting compatibility here]
> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
> index 67df6dbc2c..209c2d8592 100644
> --- a/elf/dl-tunables.list
> +++ b/elf/dl-tunables.list
> @@ -97,6 +97,10 @@ glibc {
>         minval: 0
>         maxval: 1
>       }
> +    mmap_hugetlb {
> +      type: SIZE_T
> +      minval: 0
> +    }
>     }
>     cpu {
>       hwcap_mask {
> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
> index d8109fa31c..49f033ce91 100644
> --- a/elf/tst-rtld-list-tunables.exp
> +++ b/elf/tst-rtld-list-tunables.exp
> @@ -1,6 +1,7 @@
>   glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
>   glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
>   glibc.malloc.check: 0 (min: 0, max: 3)
> +glibc.malloc.mmap_hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
>   glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
>   glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
>   glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
> diff --git a/malloc/arena.c b/malloc/arena.c
> index 81bff54303..4efb5581c1 100644
> --- a/malloc/arena.c
> +++ b/malloc/arena.c
> @@ -232,6 +232,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
>   #endif
>   TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
>   TUNABLE_CALLBACK_FNDECL (set_thp_madvise, int32_t)
> +TUNABLE_CALLBACK_FNDECL (set_mmap_hugetlb, size_t)
>   #else
>   /* Initialization routine. */
>   #include <string.h>
> @@ -333,6 +334,7 @@ ptmalloc_init (void)
>   # endif
>     TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
>     TUNABLE_GET (thp_madvise, int32_t, TUNABLE_CALLBACK (set_thp_madvise));
> +  TUNABLE_GET (mmap_hugetlb, size_t, TUNABLE_CALLBACK (set_mmap_hugetlb));
>   #else
>     if (__glibc_likely (_environ != NULL))
>       {
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index 4bfcea286f..8cf2d6855e 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -1884,6 +1884,10 @@ struct malloc_par
>   #if HAVE_TUNABLES
>     /* Transparent Large Page support.  */
>     INTERNAL_SIZE_T thp_pagesize;
> +  /* A value different than 0 means to align mmap allocation to hp_pagesize
> +     add hp_flags on flags.  */
> +  INTERNAL_SIZE_T hp_pagesize;
> +  int hp_flags;
>   #endif
>   
>     /* Memory map support */
> @@ -2415,7 +2419,8 @@ do_check_malloc_state (mstate av)
>    */
>   
>   static void *
> -sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
> +sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av,
> +		bool set_thp)
>   {
>     long int size;
>   
> @@ -2442,7 +2447,8 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
>     if (mm == MAP_FAILED)
>       return mm;
>   
> -  sysmadvise_thp (mm, size);
> +  if (set_thp)
> +    sysmadvise_thp (mm, size);

If MAP_HUGEPAGE is set in extra_flags then we don't need madvise; 
there's no need for set_thp.

>   
>     /*
>       The offset to the start of the mmapped region is stored in the prev_size
> @@ -2531,7 +2537,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>   	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
>       {
>       try_mmap:
> -      char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
> +      char *mm;
> +#if HAVE_TUNABLES
> +      if (mp_.hp_pagesize > 0)
> +	{
> +	  /* There is no need to isse the THP madvise call if Huge Pages are
> +	     used directly.  */
> +	  mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av, false);
> +	  if (mm != MAP_FAILED)
> +	    return mm;
> +	}
> +#endif
> +      mm = sysmalloc_mmap (nb, pagesize, 0, av, true);

A single tunable ought to allow you to do all this in just sysmalloc_mmap.

>         if (mm != MAP_FAILED)
>   	return mm;
>         tried_mmap = true;
> @@ -5405,6 +5422,18 @@ do_set_thp_madvise (int32_t value)
>       }
>     return 0;
>   }
> +
> +static __always_inline int
> +do_set_mmap_hugetlb (size_t value)
> +{
> +  if (value > 0)
> +    {
> +      struct malloc_hugepage_config_t cfg = __malloc_hugepage_config (value);
> +      mp_.hp_pagesize = cfg.pagesize;
> +      mp_.hp_flags = cfg.flags;

Instead of making a struct to pass it, you could just pass 
&mp.hp_pagesize and &mp.hp_flags.  Also, with a single tunable, you do 
this only when value > 1.  For value == 0, you set the default THP 
pagesize and set flags to 0.

> +    }
> +  return 0;
> +}
>   #endif
>   
>   int
> diff --git a/manual/tunables.texi b/manual/tunables.texi
> index 93c46807f9..4da6a02778 100644
> --- a/manual/tunables.texi
> +++ b/manual/tunables.texi
> @@ -279,6 +279,20 @@ The default value of this tunable is @code{0}, which disable its usage.
>   Setting to a positive value enable the @code{madvise} call.
>   @end deftp
>   
> +@deftp Tunable glibc.malloc.mmap_hugetlb
> +This tunable enable the use of Huge Pages when the system supports it (currently
> +only Linux).  It is done by aligning the memory size and passing the required
> +flags (@code{MAP_HUGETLB} on Linux) when issuing the @code{mmap} to allocate
> +memory from the system.
> +
> +The default value of this tunable is @code{0}, which disable its usage.
> +The special value @code{1} will try to gather the system default huge page size,
> +while a value larger than @code{1} will try to match it with the supported system
> +huge page size.  If either no default huge page size could be obtained or if the
> +requested size does not match the supported ones, the huge pages supports will be
> +disabled.
> +@end deftp
> +
>   @node Dynamic Linking Tunables
>   @section Dynamic Linking Tunables
>   @cindex dynamic linking tunables
> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
> index 262bcdbeb8..e5f5c1ec98 100644
> --- a/sysdeps/generic/malloc-hugepages.c
> +++ b/sysdeps/generic/malloc-hugepages.c
> @@ -29,3 +29,9 @@ __malloc_thp_mode (void)
>   {
>     return malloc_thp_mode_not_supported;
>   }
> +
> +/* Return the default transparent huge page size.  */
> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
> +{
> +  return (struct malloc_hugepage_config_t) { 0, 0 };
> +}
> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
> index 664cda9b67..27f7adfea5 100644
> --- a/sysdeps/generic/malloc-hugepages.h
> +++ b/sysdeps/generic/malloc-hugepages.h
> @@ -34,4 +34,16 @@ enum malloc_thp_mode_t
>   
>   enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
>   
> +struct malloc_hugepage_config_t
> +{
> +  size_t pagesize;
> +  int flags;
> +};
> +
> +/* Returned the support huge page size from the requested PAGESIZE along
> +   with the requires extra mmap flags.  Returning a 0 value for pagesize
> +   disables its usage.  */
> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
> +     attribute_hidden;
> +
>   #endif /* _MALLOC_HUGEPAGES_H */
> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> index 66589127cd..0eb0c764ad 100644
> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> @@ -17,8 +17,10 @@
>      not, see <https://www.gnu.org/licenses/>.  */
>   
>   #include <intprops.h>
> +#include <dirent.h>
>   #include <malloc-hugepages.h>
>   #include <not-cancel.h>
> +#include <sys/mman.h>
>   
>   size_t
>   __malloc_default_thp_pagesize (void)
> @@ -74,3 +76,126 @@ __malloc_thp_mode (void)
>       }
>     return malloc_thp_mode_not_supported;
>   }
> +
> +static size_t
> +malloc_default_hugepage_size (void)
> +{
> +  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
> +  if (fd == -1)
> +    return 0;
> +
> +  char buf[512];
> +  off64_t off = 0;
> +  while (1)
> +    {
> +      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
> +      if (r < 0)
> +	break;
> +      buf[r - 1] = '\0';
> +
> +      const char *s = strstr (buf, "Hugepagesize:");
> +      if (s == NULL)
> +	{
> +	  char *nl = strrchr (buf, '\n');
> +	  if (nl == NULL)
> +	    break;
> +	  off += (nl + 1) - buf;
> +	  continue;
> +	}
> +
> +      /* The default huge page size is in the form:
> +	 Hugepagesize:       NUMBER kB  */
> +      size_t hpsize = 0;
> +      s += sizeof ("Hugepagesize: ") - 1;
> +      for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
> +	{
> +	  if (s[i] == ' ')
> +	    continue;
> +	  hpsize *= 10;
> +	  hpsize += s[i] - '0';
> +	}
> +      return hpsize * 1024;
> +    }
> +
> +  __close_nocancel (fd);
> +
> +  return 0;
> +}
> +
> +static inline struct malloc_hugepage_config_t
> +make_malloc_hugepage_config (size_t pagesize)
> +{
> +  int flags = MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
> +  return (struct malloc_hugepage_config_t) { pagesize, flags };
> +}
> +
> +struct malloc_hugepage_config_t
> +__malloc_hugepage_config (size_t requested)
> +{
> +  if (requested == 1)
> +    {
> +      size_t pagesize = malloc_default_hugepage_size ();
> +      if (pagesize != 0)
> +	return make_malloc_hugepage_config (pagesize);
> +    }
> +
> +  int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
> +				 O_RDONLY | O_DIRECTORY, 0);
> +  if (dirfd == -1)
> +    return (struct malloc_hugepage_config_t) { 0, 0 };
> +
> +  bool found = false;
> +
> +  char buffer[1024];
> +  while (true)
> +    {
> +#if !IS_IN(libc)
> +# define __getdents64 getdents64
> +#endif
> +      ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
> +      if (ret == -1)
> +	break;
> +      else if (ret == 0)
> +        break;
> +
> +      char *begin = buffer, *end = buffer + ret;
> +      while (begin != end)
> +        {
> +          unsigned short int d_reclen;
> +          memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
> +                  sizeof (d_reclen));
> +          const char *dname = begin + offsetof (struct dirent64, d_name);
> +          begin += d_reclen;
> +
> +          if (dname[0] == '.'
> +	      || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
> +            continue;
> +
> +	  /* Each entry represents a supported huge page in the form of:
> +	     hugepages-<size>kB.  */
> +	  size_t hpsize = 0;
> +	  const char *sizestr = dname + sizeof ("hugepages-") - 1;
> +	  for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
> +	    {
> +	      hpsize *= 10;
> +	      hpsize += sizestr[i] - '0';
> +	    }
> +	  hpsize *= 1024;
> +
> +	  if (hpsize == requested)
> +	    {
> +	      found = true;
> +	      break;
> +	    }
> +        }
> +      if (found)
> +	break;
> +    }
> +
> +  __close_nocancel (dirfd);
> +
> +  if (found)
> +    return make_malloc_hugepage_config (requested);
> +
> +  return (struct malloc_hugepage_config_t) { 0, 0 };
> +}
> 


  reply	other threads:[~2021-08-19  1:04 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-18 14:19 [PATCH v2 0/4] malloc: Improve Huge Page support Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 1/4] malloc: Add madvise support for Transparent Huge Pages Adhemerval Zanella
2021-08-18 18:42   ` Siddhesh Poyarekar
2021-08-19 12:00     ` Adhemerval Zanella
2021-08-19 12:22       ` Siddhesh Poyarekar
2021-08-18 14:19 ` [PATCH v2 2/4] malloc: Add THP/madvise support for sbrk Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 3/4] malloc: Move mmap logic to its own function Adhemerval Zanella
2021-08-19  0:47   ` Siddhesh Poyarekar
2021-08-18 14:20 ` [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc Adhemerval Zanella
2021-08-19  1:03   ` Siddhesh Poyarekar [this message]
2021-08-19 12:08     ` Adhemerval Zanella
2021-08-19 17:58   ` Matheus Castanho
2021-08-19 18:50     ` Adhemerval Zanella
2021-08-20 12:34       ` Matheus Castanho
2021-08-18 18:11 ` [PATCH v2 0/4] malloc: Improve Huge Page support Siddhesh Poyarekar
2021-08-19 11:26   ` Adhemerval Zanella
2021-08-19 11:48     ` Siddhesh Poyarekar
2021-08-19 12:04       ` Adhemerval Zanella
2021-08-19 12:26         ` Siddhesh Poyarekar
2021-08-19 12:42           ` Adhemerval Zanella
2021-08-19 16:42 ` Guillaume Morin
2021-08-19 16:55   ` Adhemerval Zanella
2021-08-19 17:17     ` Guillaume Morin
2021-08-19 17:27       ` Adhemerval Zanella

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a2192ff6-bf92-0a68-b5f0-9cc09eff634b@sourceware.org \
    --to=siddhesh@sourceware.org \
    --cc=adhemerval.zanella@linaro.org \
    --cc=guillaume@morinfr.org \
    --cc=libc-alpha@sourceware.org \
    --cc=nmanthey@conp-solutions.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).