From: Adhemerval Zanella <adhemerval.zanella@linaro.org>
To: Siddhesh Poyarekar <siddhesh@sourceware.org>, libc-alpha@sourceware.org
Cc: Norbert Manthey <nmanthey@conp-solutions.com>,
Guillaume Morin <guillaume@morinfr.org>
Subject: Re: [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc
Date: Thu, 19 Aug 2021 09:08:09 -0300 [thread overview]
Message-ID: <1c4f239d-aa52-c39a-3049-b4b53ae49abb@linaro.org> (raw)
In-Reply-To: <a2192ff6-bf92-0a68-b5f0-9cc09eff634b@sourceware.org>
On 18/08/2021 22:03, Siddhesh Poyarekar wrote:
> On 8/18/21 7:50 PM, Adhemerval Zanella via Libc-alpha wrote:
>> A new tunable, 'glibc.malloc.mmap_hugetlb', adds support to use Huge Page
>> support directly with mmap() calls. The required supported sizes and
>> flags for mmap() are provided by an arch-specific internal hook
>> malloc_hp_config().
>>
>> Currently it first try mmap() using the huge page size and fallback to
>> default page size and sbrk() call if kernel returns MMAP_FAILED.
>>
>> The default malloc_hp_config() implementation does not enable it even
>> if the tunable is set.
>>
>> Checked on x86_64-linux-gnu.
>> ---
>> NEWS | 4 +
>> elf/dl-tunables.list | 4 +
>> elf/tst-rtld-list-tunables.exp | 1 +
>> malloc/arena.c | 2 +
>> malloc/malloc.c | 35 +++++-
>> manual/tunables.texi | 14 +++
>> sysdeps/generic/malloc-hugepages.c | 6 +
>> sysdeps/generic/malloc-hugepages.h | 12 ++
>> sysdeps/unix/sysv/linux/malloc-hugepages.c | 125 +++++++++++++++++++++
>> 9 files changed, 200 insertions(+), 3 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 9b2345d08c..412bf3e6f8 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -14,6 +14,10 @@ Major new features:
>> It might improve performance with Transparent Huge Pages madvise mode
>> depending of the workload.
>> +* On Linux, a new tunable, glibc.malloc.mmap_hugetlb, can be used to
>> + instruct malloc to try use Huge Pages when allocate memory with mmap()
>> + calls (through the use of MAP_HUGETLB).
>> +
>> Deprecated and removed features, and other changes affecting compatibility:
>> [Add deprecations, removals and changes affecting compatibility here]
>> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
>> index 67df6dbc2c..209c2d8592 100644
>> --- a/elf/dl-tunables.list
>> +++ b/elf/dl-tunables.list
>> @@ -97,6 +97,10 @@ glibc {
>> minval: 0
>> maxval: 1
>> }
>> + mmap_hugetlb {
>> + type: SIZE_T
>> + minval: 0
>> + }
>> }
>> cpu {
>> hwcap_mask {
>> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
>> index d8109fa31c..49f033ce91 100644
>> --- a/elf/tst-rtld-list-tunables.exp
>> +++ b/elf/tst-rtld-list-tunables.exp
>> @@ -1,6 +1,7 @@
>> glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
>> glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
>> glibc.malloc.check: 0 (min: 0, max: 3)
>> +glibc.malloc.mmap_hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
>> glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
>> glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
>> glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
>> diff --git a/malloc/arena.c b/malloc/arena.c
>> index 81bff54303..4efb5581c1 100644
>> --- a/malloc/arena.c
>> +++ b/malloc/arena.c
>> @@ -232,6 +232,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
>> #endif
>> TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
>> TUNABLE_CALLBACK_FNDECL (set_thp_madvise, int32_t)
>> +TUNABLE_CALLBACK_FNDECL (set_mmap_hugetlb, size_t)
>> #else
>> /* Initialization routine. */
>> #include <string.h>
>> @@ -333,6 +334,7 @@ ptmalloc_init (void)
>> # endif
>> TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
>> TUNABLE_GET (thp_madvise, int32_t, TUNABLE_CALLBACK (set_thp_madvise));
>> + TUNABLE_GET (mmap_hugetlb, size_t, TUNABLE_CALLBACK (set_mmap_hugetlb));
>> #else
>> if (__glibc_likely (_environ != NULL))
>> {
>> diff --git a/malloc/malloc.c b/malloc/malloc.c
>> index 4bfcea286f..8cf2d6855e 100644
>> --- a/malloc/malloc.c
>> +++ b/malloc/malloc.c
>> @@ -1884,6 +1884,10 @@ struct malloc_par
>> #if HAVE_TUNABLES
>> /* Transparent Large Page support. */
>> INTERNAL_SIZE_T thp_pagesize;
>> + /* A value different than 0 means to align mmap allocation to hp_pagesize
>> + add hp_flags on flags. */
>> + INTERNAL_SIZE_T hp_pagesize;
>> + int hp_flags;
>> #endif
>> /* Memory map support */
>> @@ -2415,7 +2419,8 @@ do_check_malloc_state (mstate av)
>> */
>> static void *
>> -sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
>> +sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av,
>> + bool set_thp)
>> {
>> long int size;
>> @@ -2442,7 +2447,8 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
>> if (mm == MAP_FAILED)
>> return mm;
>> - sysmadvise_thp (mm, size);
>> + if (set_thp)
>> + sysmadvise_thp (mm, size);
>
> If MAP_HUGEPAGE is set in extra_flags then we don't need madvise; there's no need for set_thp.
Alright we can use it instead. I just add the flag to avoid the extra
ifdef MAP_HUGEPAGE.
>
>> /*
>> The offset to the start of the mmapped region is stored in the prev_size
>> @@ -2531,7 +2537,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
>> && (mp_.n_mmaps < mp_.n_mmaps_max)))
>> {
>> try_mmap:
>> - char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
>> + char *mm;
>> +#if HAVE_TUNABLES
>> + if (mp_.hp_pagesize > 0)
>> + {
>> + /* There is no need to isse the THP madvise call if Huge Pages are
>> + used directly. */
>> + mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av, false);
>> + if (mm != MAP_FAILED)
>> + return mm;
>> + }
>> +#endif
>> + mm = sysmalloc_mmap (nb, pagesize, 0, av, true);
>
> A single tunable ought to allow you to do all this in just sysmalloc_mmap.
>
>> if (mm != MAP_FAILED)
>> return mm;
>> tried_mmap = true;
>> @@ -5405,6 +5422,18 @@ do_set_thp_madvise (int32_t value)
>> }
>> return 0;
>> }
>> +
>> +static __always_inline int
>> +do_set_mmap_hugetlb (size_t value)
>> +{
>> + if (value > 0)
>> + {
>> + struct malloc_hugepage_config_t cfg = __malloc_hugepage_config (value);
>> + mp_.hp_pagesize = cfg.pagesize;
>> + mp_.hp_flags = cfg.flags;
>
> Instead of making a struct to pass it, you could just pass &mp.hp_pagesize and &mp.hp_flags. Also, with a single tunable, you do this only when value > 1. For value == 0, you set the default THP pagesize and set flags to 0.
>
>> + }
>> + return 0;
>> +}
>> #endif
>> int
I don't have a strong opinion here, using pointers should work as well.
>> diff --git a/manual/tunables.texi b/manual/tunables.texi
>> index 93c46807f9..4da6a02778 100644
>> --- a/manual/tunables.texi
>> +++ b/manual/tunables.texi
>> @@ -279,6 +279,20 @@ The default value of this tunable is @code{0}, which disable its usage.
>> Setting to a positive value enable the @code{madvise} call.
>> @end deftp
>> +@deftp Tunable glibc.malloc.mmap_hugetlb
>> +This tunable enable the use of Huge Pages when the system supports it (currently
>> +only Linux). It is done by aligning the memory size and passing the required
>> +flags (@code{MAP_HUGETLB} on Linux) when issuing the @code{mmap} to allocate
>> +memory from the system.
>> +
>> +The default value of this tunable is @code{0}, which disable its usage.
>> +The special value @code{1} will try to gather the system default huge page size,
>> +while a value larger than @code{1} will try to match it with the supported system
>> +huge page size. If either no default huge page size could be obtained or if the
>> +requested size does not match the supported ones, the huge pages supports will be
>> +disabled.
>> +@end deftp
>> +
>> @node Dynamic Linking Tunables
>> @section Dynamic Linking Tunables
>> @cindex dynamic linking tunables
>> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
>> index 262bcdbeb8..e5f5c1ec98 100644
>> --- a/sysdeps/generic/malloc-hugepages.c
>> +++ b/sysdeps/generic/malloc-hugepages.c
>> @@ -29,3 +29,9 @@ __malloc_thp_mode (void)
>> {
>> return malloc_thp_mode_not_supported;
>> }
>> +
>> +/* Return the default transparent huge page size. */
>> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
>> +{
>> + return (struct malloc_hugepage_config_t) { 0, 0 };
>> +}
>> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
>> index 664cda9b67..27f7adfea5 100644
>> --- a/sysdeps/generic/malloc-hugepages.h
>> +++ b/sysdeps/generic/malloc-hugepages.h
>> @@ -34,4 +34,16 @@ enum malloc_thp_mode_t
>> enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
>> +struct malloc_hugepage_config_t
>> +{
>> + size_t pagesize;
>> + int flags;
>> +};
>> +
>> +/* Returned the support huge page size from the requested PAGESIZE along
>> + with the requires extra mmap flags. Returning a 0 value for pagesize
>> + disables its usage. */
>> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
>> + attribute_hidden;
>> +
>> #endif /* _MALLOC_HUGEPAGES_H */
>> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> index 66589127cd..0eb0c764ad 100644
>> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
>> @@ -17,8 +17,10 @@
>> not, see <https://www.gnu.org/licenses/>. */
>> #include <intprops.h>
>> +#include <dirent.h>
>> #include <malloc-hugepages.h>
>> #include <not-cancel.h>
>> +#include <sys/mman.h>
>> size_t
>> __malloc_default_thp_pagesize (void)
>> @@ -74,3 +76,126 @@ __malloc_thp_mode (void)
>> }
>> return malloc_thp_mode_not_supported;
>> }
>> +
>> +static size_t
>> +malloc_default_hugepage_size (void)
>> +{
>> + int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
>> + if (fd == -1)
>> + return 0;
>> +
>> + char buf[512];
>> + off64_t off = 0;
>> + while (1)
>> + {
>> + ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
>> + if (r < 0)
>> + break;
>> + buf[r - 1] = '\0';
>> +
>> + const char *s = strstr (buf, "Hugepagesize:");
>> + if (s == NULL)
>> + {
>> + char *nl = strrchr (buf, '\n');
>> + if (nl == NULL)
>> + break;
>> + off += (nl + 1) - buf;
>> + continue;
>> + }
>> +
>> + /* The default huge page size is in the form:
>> + Hugepagesize: NUMBER kB */
>> + size_t hpsize = 0;
>> + s += sizeof ("Hugepagesize: ") - 1;
>> + for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
>> + {
>> + if (s[i] == ' ')
>> + continue;
>> + hpsize *= 10;
>> + hpsize += s[i] - '0';
>> + }
>> + return hpsize * 1024;
>> + }
>> +
>> + __close_nocancel (fd);
>> +
>> + return 0;
>> +}
>> +
>> +static inline struct malloc_hugepage_config_t
>> +make_malloc_hugepage_config (size_t pagesize)
>> +{
>> + int flags = MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
>> + return (struct malloc_hugepage_config_t) { pagesize, flags };
>> +}
>> +
>> +struct malloc_hugepage_config_t
>> +__malloc_hugepage_config (size_t requested)
>> +{
>> + if (requested == 1)
>> + {
>> + size_t pagesize = malloc_default_hugepage_size ();
>> + if (pagesize != 0)
>> + return make_malloc_hugepage_config (pagesize);
>> + }
>> +
>> + int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
>> + O_RDONLY | O_DIRECTORY, 0);
>> + if (dirfd == -1)
>> + return (struct malloc_hugepage_config_t) { 0, 0 };
>> +
>> + bool found = false;
>> +
>> + char buffer[1024];
>> + while (true)
>> + {
>> +#if !IS_IN(libc)
>> +# define __getdents64 getdents64
>> +#endif
>> + ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
>> + if (ret == -1)
>> + break;
>> + else if (ret == 0)
>> + break;
>> +
>> + char *begin = buffer, *end = buffer + ret;
>> + while (begin != end)
>> + {
>> + unsigned short int d_reclen;
>> + memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
>> + sizeof (d_reclen));
>> + const char *dname = begin + offsetof (struct dirent64, d_name);
>> + begin += d_reclen;
>> +
>> + if (dname[0] == '.'
>> + || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
>> + continue;
>> +
>> + /* Each entry represents a supported huge page in the form of:
>> + hugepages-<size>kB. */
>> + size_t hpsize = 0;
>> + const char *sizestr = dname + sizeof ("hugepages-") - 1;
>> + for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
>> + {
>> + hpsize *= 10;
>> + hpsize += sizestr[i] - '0';
>> + }
>> + hpsize *= 1024;
>> +
>> + if (hpsize == requested)
>> + {
>> + found = true;
>> + break;
>> + }
>> + }
>> + if (found)
>> + break;
>> + }
>> +
>> + __close_nocancel (dirfd);
>> +
>> + if (found)
>> + return make_malloc_hugepage_config (requested);
>> +
>> + return (struct malloc_hugepage_config_t) { 0, 0 };
>> +}
>>
>
next prev parent reply other threads:[~2021-08-19 12:08 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-18 14:19 [PATCH v2 0/4] malloc: Improve Huge Page support Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 1/4] malloc: Add madvise support for Transparent Huge Pages Adhemerval Zanella
2021-08-18 18:42 ` Siddhesh Poyarekar
2021-08-19 12:00 ` Adhemerval Zanella
2021-08-19 12:22 ` Siddhesh Poyarekar
2021-08-18 14:19 ` [PATCH v2 2/4] malloc: Add THP/madvise support for sbrk Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 3/4] malloc: Move mmap logic to its own function Adhemerval Zanella
2021-08-19 0:47 ` Siddhesh Poyarekar
2021-08-18 14:20 ` [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc Adhemerval Zanella
2021-08-19 1:03 ` Siddhesh Poyarekar
2021-08-19 12:08 ` Adhemerval Zanella [this message]
2021-08-19 17:58 ` Matheus Castanho
2021-08-19 18:50 ` Adhemerval Zanella
2021-08-20 12:34 ` Matheus Castanho
2021-08-18 18:11 ` [PATCH v2 0/4] malloc: Improve Huge Page support Siddhesh Poyarekar
2021-08-19 11:26 ` Adhemerval Zanella
2021-08-19 11:48 ` Siddhesh Poyarekar
2021-08-19 12:04 ` Adhemerval Zanella
2021-08-19 12:26 ` Siddhesh Poyarekar
2021-08-19 12:42 ` Adhemerval Zanella
2021-08-19 16:42 ` Guillaume Morin
2021-08-19 16:55 ` Adhemerval Zanella
2021-08-19 17:17 ` Guillaume Morin
2021-08-19 17:27 ` Adhemerval Zanella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1c4f239d-aa52-c39a-3049-b4b53ae49abb@linaro.org \
--to=adhemerval.zanella@linaro.org \
--cc=guillaume@morinfr.org \
--cc=libc-alpha@sourceware.org \
--cc=nmanthey@conp-solutions.com \
--cc=siddhesh@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).