public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed
From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: Siddhesh Poyarekar <siddhesh@sourceware.org>, libc-alpha@sourceware.org
Cc: Norbert Manthey <nmanthey@conp-solutions.com>,
	Guillaume Morin <guillaume@morinfr.org>
Subject: Re: [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc
Date: Thu, 19 Aug 2021 09:08:09 -0300	[thread overview]
Message-ID: <1c4f239d-aa52-c39a-3049-b4b53ae49abb@linaro.org> (raw)
In-Reply-To: <a2192ff6-bf92-0a68-b5f0-9cc09eff634b@sourceware.org>



On 18/08/2021 22:03, Siddhesh Poyarekar wrote:
> On 8/18/21 7:50 PM, Adhemerval Zanella via Libc-alpha wrote:
>> A new tunable, 'glibc.malloc.mmap_hugetlb', adds support to use Huge Page
>> support directly with mmap() calls.  The required supported sizes and
>> flags for mmap() are provided by an arch-specific internal hook
>> malloc_hp_config().
>>
>> Currently it first try mmap() using the huge page size and fallback to
>> default page size and sbrk() call if kernel returns MMAP_FAILED.
>>
>> The default malloc_hp_config() implementation does not enable it even
>> if the tunable is set.
>>
>> Checked on x86_64-linux-gnu.
>> ---
>>   NEWS                                       |   4 +
>>   elf/dl-tunables.list                       |   4 +
>>   elf/tst-rtld-list-tunables.exp             |   1 +
>>   malloc/arena.c                             |   2 +
>>   malloc/malloc.c                            |  35 +++++-
>>   manual/tunables.texi                       |  14 +++
>>   sysdeps/generic/malloc-hugepages.c         |   6 +
>>   sysdeps/generic/malloc-hugepages.h         |  12 ++
>>   sysdeps/unix/sysv/linux/malloc-hugepages.c | 125 +++++++++++++++++++++
>>   9 files changed, 200 insertions(+), 3 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 9b2345d08c..412bf3e6f8 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -14,6 +14,10 @@ Major new features:
>>     It might improve performance with Transparent Huge Pages madvise mode
>>     depending of the workload.
>>   +* On Linux, a new tunable, glibc.malloc.mmap_hugetlb, can be used to
>> +  instruct malloc to try use Huge Pages when allocate memory with mmap()
>> +  calls (through the use of MAP_HUGETLB).
>> +
>>   Deprecated and removed features, and other changes affecting compatibility:
>>       [Add deprecations, removals and changes affecting compatibility here]
>> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
>> index 67df6dbc2c..209c2d8592 100644
>> --- a/elf/dl-tunables.list
>> +++ b/elf/dl-tunables.list
>> @@ -97,6 +97,10 @@ glibc {
>>         minval: 0
>>         maxval: 1
>>       }
>> +    mmap_hugetlb {
>> +      type: SIZE_T
>> +      minval: 0
>> +    }
>>     }
>>     cpu {
>>       hwcap_mask {
>> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
>> index d8109fa31c..49f033ce91 100644
>> --- a/elf/tst-rtld-list-tunables.exp
>> +++ b/elf/tst-rtld-list-tunables.exp
>> @@ -1,6 +1,7 @@
>>   glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
>>   glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
>>   glibc.malloc.check: 0 (min: 0, max: 3)
>> +glibc.malloc.mmap_hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
>>   glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
>>   glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
>>   glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
>> diff --git a/malloc/arena.c b/malloc/arena.c
>> index 81bff54303..4efb5581c1 100644
>> --- a/malloc/arena.c
>> +++ b/malloc/arena.c
>> @@ -232,6 +232,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
>>   #endif
>>   TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
>>   TUNABLE_CALLBACK_FNDECL (set_thp_madvise, int32_t)
>> +TUNABLE_CALLBACK_FNDECL (set_mmap_hugetlb, size_t)
>>   #else
>>   /* Initialization routine. */
>>   #include <string.h>
>> @@ -333,6 +334,7 @@ ptmalloc_init (void)
>>   # endif
>>     TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
>>     TUNABLE_GET (thp_madvise, int32_t, TUNABLE_CALLBACK (set_thp_madvise));
>> +  TUNABLE_GET (mmap_hugetlb, size_t, TUNABLE_CALLBACK (set_mmap_hugetlb));
>>   #else
>>     if (__glibc_likely (_environ != NULL))
>>       {
>> diff --git a/malloc/malloc.c b/malloc/malloc.c
>> index 4bfcea286f..8cf2d6855e 100644
>> --- a/malloc/malloc.c
>> +++ b/malloc/malloc.c
>> @@ -1884,6 +1884,10 @@ struct malloc_par
>>   #if HAVE_TUNABLES
>>     /* Transparent Large Page support.  */
>>     INTERNAL_SIZE_T thp_pagesize;
>> +  /* A value different than 0 means to align mmap allocation to hp_pagesize
>> +     add hp_flags on flags.  */
>> +  INTERNAL_SIZE_T hp_pagesize;
>> +  int hp_flags;
>>   #endif
>>       /* Memory map support */
>> @@ -2415,7 +2419,8 @@ do_check_malloc_state (mstate av)
>>    */
>>     static void *
>> -sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
>> +sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av,
>> +        bool set_thp)
>>   {
>>     long int size;
>>   @@ -2442,7 +2447,8 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
>>     if (mm == MAP_FAILED)
>>       return mm;
>>   -  sysmadvise_thp (mm, size);
>> +  if (set_thp)
>> +    sysmadvise_thp (mm, size);
> 
> If MAP_HUGEPAGE is set in extra_flags then we don't need madvise; there's no need for set_thp.

Alright we can use it instead.  I just add the flag to avoid the extra
ifdef MAP_HUGEPAGE.

> 
>>       /*
>>       The offset to the start of the mmapped region is stored in the prev_size
>> @@ -2531,7 +2537,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>>         && (mp_.n_mmaps < mp_.n_mmaps_max)))
>>       {
>>       try_mmap:
>> -      char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
>> +      char *mm;
>> +#if HAVE_TUNABLES
>> +      if (mp_.hp_pagesize > 0)
>> +    {
>> +      /* There is no need to isse the THP madvise call if Huge Pages are
>> +         used directly.  */
>> +      mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av, false);
>> +      if (mm != MAP_FAILED)
>> +        return mm;
>> +    }
>> +#endif
>> +      mm = sysmalloc_mmap (nb, pagesize, 0, av, true);
> 
> A single tunable ought to allow you to do all this in just sysmalloc_mmap.
> 
>>         if (mm != MAP_FAILED)
>>       return mm;
>>         tried_mmap = true;
>> @@ -5405,6 +5422,18 @@ do_set_thp_madvise (int32_t value)
>>       }
>>     return 0;
>>   }
>> +
>> +static __always_inline int
>> +do_set_mmap_hugetlb (size_t value)
>> +{
>> +  if (value > 0)
>> +    {
>> +      struct malloc_hugepage_config_t cfg = __malloc_hugepage_config (value);
>> +      mp_.hp_pagesize = cfg.pagesize;
>> +      mp_.hp_flags = cfg.flags;
> 
> Instead of making a struct to pass it, you could just pass &mp.hp_pagesize and &mp.hp_flags.  Also, with a single tunable, you do this only when value > 1.  For value == 0, you set the default THP pagesize and set flags to 0.
> 
>> +    }
>> +  return 0;
>> +}
>>   #endif
>>     int

I don't have a strong opinion here, using pointers should work as well.

>> diff --git a/manual/tunables.texi b/manual/tunables.texi
>> index 93c46807f9..4da6a02778 100644
>> --- a/manual/tunables.texi
>> +++ b/manual/tunables.texi
>> @@ -279,6 +279,20 @@ The default value of this tunable is @code{0}, which disable its usage.
>>   Setting to a positive value enable the @code{madvise} call.
>>   @end deftp
>>   +@deftp Tunable glibc.malloc.mmap_hugetlb
>> +This tunable enable the use of Huge Pages when the system supports it (currently
>> +only Linux).  It is done by aligning the memory size and passing the required
>> +flags (@code{MAP_HUGETLB} on Linux) when issuing the @code{mmap} to allocate
>> +memory from the system.
>> +
>> +The default value of this tunable is @code{0}, which disable its usage.
>> +The special value @code{1} will try to gather the system default huge page size,
>> +while a value larger than @code{1} will try to match it with the supported system
>> +huge page size.  If either no default huge page size could be obtained or if the
>> +requested size does not match the supported ones, the huge pages supports will be
>> +disabled.
>> +@end deftp
>> +
>>   @node Dynamic Linking Tunables
>>   @section Dynamic Linking Tunables
>>   @cindex dynamic linking tunables
>> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
>> index 262bcdbeb8..e5f5c1ec98 100644
>> --- a/sysdeps/generic/malloc-hugepages.c
>> +++ b/sysdeps/generic/malloc-hugepages.c
>> @@ -29,3 +29,9 @@ __malloc_thp_mode (void)
>>   {
>>     return malloc_thp_mode_not_supported;
>>   }
>> +
>> +/* Return the default transparent huge page size.  */
>> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
>> +{
>> +  return (struct malloc_hugepage_config_t) { 0, 0 };
>> +}
>> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
>> index 664cda9b67..27f7adfea5 100644
>> --- a/sysdeps/generic/malloc-hugepages.h
>> +++ b/sysdeps/generic/malloc-hugepages.h
>> @@ -34,4 +34,16 @@ enum malloc_thp_mode_t
>>     enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
>>   +struct malloc_hugepage_config_t
>> +{
>> +  size_t pagesize;
>> +  int flags;
>> +};
>> +
>> +/* Returned the support huge page size from the requested PAGESIZE along
>> +   with the requires extra mmap flags.  Returning a 0 value for pagesize
>> +   disables its usage.  */
>> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
>> +     attribute_hidden;
>> +
>>   #endif /* _MALLOC_HUGEPAGES_H */
>> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> index 66589127cd..0eb0c764ad 100644
>> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> @@ -17,8 +17,10 @@
>>      not, see <https://www.gnu.org/licenses/>.  */
>>     #include <intprops.h>
>> +#include <dirent.h>
>>   #include <malloc-hugepages.h>
>>   #include <not-cancel.h>
>> +#include <sys/mman.h>
>>     size_t
>>   __malloc_default_thp_pagesize (void)
>> @@ -74,3 +76,126 @@ __malloc_thp_mode (void)
>>       }
>>     return malloc_thp_mode_not_supported;
>>   }
>> +
>> +static size_t
>> +malloc_default_hugepage_size (void)
>> +{
>> +  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
>> +  if (fd == -1)
>> +    return 0;
>> +
>> +  char buf[512];
>> +  off64_t off = 0;
>> +  while (1)
>> +    {
>> +      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
>> +      if (r < 0)
>> +    break;
>> +      buf[r - 1] = '\0';
>> +
>> +      const char *s = strstr (buf, "Hugepagesize:");
>> +      if (s == NULL)
>> +    {
>> +      char *nl = strrchr (buf, '\n');
>> +      if (nl == NULL)
>> +        break;
>> +      off += (nl + 1) - buf;
>> +      continue;
>> +    }
>> +
>> +      /* The default huge page size is in the form:
>> +     Hugepagesize:       NUMBER kB  */
>> +      size_t hpsize = 0;
>> +      s += sizeof ("Hugepagesize: ") - 1;
>> +      for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
>> +    {
>> +      if (s[i] == ' ')
>> +        continue;
>> +      hpsize *= 10;
>> +      hpsize += s[i] - '0';
>> +    }
>> +      return hpsize * 1024;
>> +    }
>> +
>> +  __close_nocancel (fd);
>> +
>> +  return 0;
>> +}
>> +
>> +static inline struct malloc_hugepage_config_t
>> +make_malloc_hugepage_config (size_t pagesize)
>> +{
>> +  int flags = MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
>> +  return (struct malloc_hugepage_config_t) { pagesize, flags };
>> +}
>> +
>> +struct malloc_hugepage_config_t
>> +__malloc_hugepage_config (size_t requested)
>> +{
>> +  if (requested == 1)
>> +    {
>> +      size_t pagesize = malloc_default_hugepage_size ();
>> +      if (pagesize != 0)
>> +    return make_malloc_hugepage_config (pagesize);
>> +    }
>> +
>> +  int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
>> +                 O_RDONLY | O_DIRECTORY, 0);
>> +  if (dirfd == -1)
>> +    return (struct malloc_hugepage_config_t) { 0, 0 };
>> +
>> +  bool found = false;
>> +
>> +  char buffer[1024];
>> +  while (true)
>> +    {
>> +#if !IS_IN(libc)
>> +# define __getdents64 getdents64
>> +#endif
>> +      ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
>> +      if (ret == -1)
>> +    break;
>> +      else if (ret == 0)
>> +        break;
>> +
>> +      char *begin = buffer, *end = buffer + ret;
>> +      while (begin != end)
>> +        {
>> +          unsigned short int d_reclen;
>> +          memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
>> +                  sizeof (d_reclen));
>> +          const char *dname = begin + offsetof (struct dirent64, d_name);
>> +          begin += d_reclen;
>> +
>> +          if (dname[0] == '.'
>> +          || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
>> +            continue;
>> +
>> +      /* Each entry represents a supported huge page in the form of:
>> +         hugepages-<size>kB.  */
>> +      size_t hpsize = 0;
>> +      const char *sizestr = dname + sizeof ("hugepages-") - 1;
>> +      for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
>> +        {
>> +          hpsize *= 10;
>> +          hpsize += sizestr[i] - '0';
>> +        }
>> +      hpsize *= 1024;
>> +
>> +      if (hpsize == requested)
>> +        {
>> +          found = true;
>> +          break;
>> +        }
>> +        }
>> +      if (found)
>> +    break;
>> +    }
>> +
>> +  __close_nocancel (dirfd);
>> +
>> +  if (found)
>> +    return make_malloc_hugepage_config (requested);
>> +
>> +  return (struct malloc_hugepage_config_t) { 0, 0 };
>> +}
>>
> 

  reply	other threads:[~2021-08-19 12:08 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-18 14:19 [PATCH v2 0/4] malloc: Improve Huge Page support Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 1/4] malloc: Add madvise support for Transparent Huge Pages Adhemerval Zanella
2021-08-18 18:42   ` Siddhesh Poyarekar
2021-08-19 12:00     ` Adhemerval Zanella
2021-08-19 12:22       ` Siddhesh Poyarekar
2021-08-18 14:19 ` [PATCH v2 2/4] malloc: Add THP/madvise support for sbrk Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 3/4] malloc: Move mmap logic to its own function Adhemerval Zanella
2021-08-19  0:47   ` Siddhesh Poyarekar
2021-08-18 14:20 ` [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc Adhemerval Zanella
2021-08-19  1:03   ` Siddhesh Poyarekar
2021-08-19 12:08     ` Adhemerval Zanella [this message]
2021-08-19 17:58   ` Matheus Castanho
2021-08-19 18:50     ` Adhemerval Zanella
2021-08-20 12:34       ` Matheus Castanho
2021-08-18 18:11 ` [PATCH v2 0/4] malloc: Improve Huge Page support Siddhesh Poyarekar
2021-08-19 11:26   ` Adhemerval Zanella
2021-08-19 11:48     ` Siddhesh Poyarekar
2021-08-19 12:04       ` Adhemerval Zanella
2021-08-19 12:26         ` Siddhesh Poyarekar
2021-08-19 12:42           ` Adhemerval Zanella
2021-08-19 16:42 ` Guillaume Morin
2021-08-19 16:55   ` Adhemerval Zanella
2021-08-19 17:17     ` Guillaume Morin
2021-08-19 17:27       ` Adhemerval Zanella

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1c4f239d-aa52-c39a-3049-b4b53ae49abb@linaro.org \
    --to=adhemerval.zanella@linaro.org \
    --cc=guillaume@morinfr.org \
    --cc=libc-alpha@sourceware.org \
    --cc=nmanthey@conp-solutions.com \
    --cc=siddhesh@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).