From: Matheus Castanho <msc@linux.ibm.com>
To: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Cc: Norbert Manthey <nmanthey@conp-solutions.com>,
Guillaume Morin <guillaume@morinfr.org>,
Siddhesh Poyarekar <siddhesh@sourceware.org>,
libc-alpha@sourceware.org,
Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
Subject: Re: [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc
Date: Thu, 19 Aug 2021 14:58:04 -0300 [thread overview]
Message-ID: <871r6pjp03.fsf@linux.ibm.com> (raw)
In-Reply-To: <20210818142000.128752-5-adhemerval.zanella@linaro.org>
[-- Attachment #1: Type: text/plain, Size: 12329 bytes --]
Adhemerval Zanella via Libc-alpha <libc-alpha@sourceware.org> writes:
> A new tunable, 'glibc.malloc.mmap_hugetlb', adds support to use Huge Page
> support directly with mmap() calls. The required supported sizes and
> flags for mmap() are provided by an arch-specific internal hook
> malloc_hp_config().
>
> Currently it first try mmap() using the huge page size and fallback to
> default page size and sbrk() call if kernel returns MMAP_FAILED.
>
> The default malloc_hp_config() implementation does not enable it even
> if the tunable is set.
>
> Checked on x86_64-linux-gnu.
> ---
> NEWS | 4 +
> elf/dl-tunables.list | 4 +
> elf/tst-rtld-list-tunables.exp | 1 +
> malloc/arena.c | 2 +
> malloc/malloc.c | 35 +++++-
> manual/tunables.texi | 14 +++
> sysdeps/generic/malloc-hugepages.c | 6 +
> sysdeps/generic/malloc-hugepages.h | 12 ++
> sysdeps/unix/sysv/linux/malloc-hugepages.c | 125 +++++++++++++++++++++
> 9 files changed, 200 insertions(+), 3 deletions(-)
>
> diff --git a/NEWS b/NEWS
> index 9b2345d08c..412bf3e6f8 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -14,6 +14,10 @@ Major new features:
> It might improve performance with Transparent Huge Pages madvise mode
> depending of the workload.
>
> +* On Linux, a new tunable, glibc.malloc.mmap_hugetlb, can be used to
> + instruct malloc to try use Huge Pages when allocate memory with mmap()
> + calls (through the use of MAP_HUGETLB).
> +
> Deprecated and removed features, and other changes affecting compatibility:
>
> [Add deprecations, removals and changes affecting compatibility here]
> diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
> index 67df6dbc2c..209c2d8592 100644
> --- a/elf/dl-tunables.list
> +++ b/elf/dl-tunables.list
> @@ -97,6 +97,10 @@ glibc {
> minval: 0
> maxval: 1
> }
> + mmap_hugetlb {
> + type: SIZE_T
> + minval: 0
> + }
> }
> cpu {
> hwcap_mask {
> diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
> index d8109fa31c..49f033ce91 100644
> --- a/elf/tst-rtld-list-tunables.exp
> +++ b/elf/tst-rtld-list-tunables.exp
> @@ -1,6 +1,7 @@
> glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
> glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
> glibc.malloc.check: 0 (min: 0, max: 3)
> +glibc.malloc.mmap_hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
> glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
> glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
> glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
> diff --git a/malloc/arena.c b/malloc/arena.c
> index 81bff54303..4efb5581c1 100644
> --- a/malloc/arena.c
> +++ b/malloc/arena.c
> @@ -232,6 +232,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
> #endif
> TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
> TUNABLE_CALLBACK_FNDECL (set_thp_madvise, int32_t)
> +TUNABLE_CALLBACK_FNDECL (set_mmap_hugetlb, size_t)
> #else
> /* Initialization routine. */
> #include <string.h>
> @@ -333,6 +334,7 @@ ptmalloc_init (void)
> # endif
> TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
> TUNABLE_GET (thp_madvise, int32_t, TUNABLE_CALLBACK (set_thp_madvise));
> + TUNABLE_GET (mmap_hugetlb, size_t, TUNABLE_CALLBACK (set_mmap_hugetlb));
> #else
> if (__glibc_likely (_environ != NULL))
> {
> diff --git a/malloc/malloc.c b/malloc/malloc.c
> index 4bfcea286f..8cf2d6855e 100644
> --- a/malloc/malloc.c
> +++ b/malloc/malloc.c
> @@ -1884,6 +1884,10 @@ struct malloc_par
> #if HAVE_TUNABLES
> /* Transparent Large Page support. */
> INTERNAL_SIZE_T thp_pagesize;
> + /* A value different than 0 means to align mmap allocation to hp_pagesize
> + add hp_flags on flags. */
> + INTERNAL_SIZE_T hp_pagesize;
> + int hp_flags;
> #endif
>
> /* Memory map support */
> @@ -2415,7 +2419,8 @@ do_check_malloc_state (mstate av)
> */
>
> static void *
> -sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
> +sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av,
> + bool set_thp)
> {
> long int size;
>
> @@ -2442,7 +2447,8 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
> if (mm == MAP_FAILED)
> return mm;
>
> - sysmadvise_thp (mm, size);
> + if (set_thp)
> + sysmadvise_thp (mm, size);
>
> /*
> The offset to the start of the mmapped region is stored in the prev_size
> @@ -2531,7 +2537,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
> && (mp_.n_mmaps < mp_.n_mmaps_max)))
> {
> try_mmap:
> - char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
> + char *mm;
> +#if HAVE_TUNABLES
> + if (mp_.hp_pagesize > 0)
> + {
> + /* There is no need to isse the THP madvise call if Huge Pages are
> + used directly. */
> + mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av, false);
> + if (mm != MAP_FAILED)
> + return mm;
> + }
> +#endif
> + mm = sysmalloc_mmap (nb, pagesize, 0, av, true);
> if (mm != MAP_FAILED)
> return mm;
> tried_mmap = true;
> @@ -5405,6 +5422,18 @@ do_set_thp_madvise (int32_t value)
> }
> return 0;
> }
> +
> +static __always_inline int
> +do_set_mmap_hugetlb (size_t value)
> +{
> + if (value > 0)
> + {
> + struct malloc_hugepage_config_t cfg = __malloc_hugepage_config (value);
> + mp_.hp_pagesize = cfg.pagesize;
> + mp_.hp_flags = cfg.flags;
> + }
> + return 0;
> +}
> #endif
>
> int
> diff --git a/manual/tunables.texi b/manual/tunables.texi
> index 93c46807f9..4da6a02778 100644
> --- a/manual/tunables.texi
> +++ b/manual/tunables.texi
> @@ -279,6 +279,20 @@ The default value of this tunable is @code{0}, which disable its usage.
> Setting to a positive value enable the @code{madvise} call.
> @end deftp
>
> +@deftp Tunable glibc.malloc.mmap_hugetlb
> +This tunable enable the use of Huge Pages when the system supports it (currently
> +only Linux). It is done by aligning the memory size and passing the required
> +flags (@code{MAP_HUGETLB} on Linux) when issuing the @code{mmap} to allocate
> +memory from the system.
> +
> +The default value of this tunable is @code{0}, which disable its usage.
> +The special value @code{1} will try to gather the system default huge page size,
> +while a value larger than @code{1} will try to match it with the supported system
> +huge page size. If either no default huge page size could be obtained or if the
> +requested size does not match the supported ones, the huge pages supports will be
> +disabled.
> +@end deftp
> +
> @node Dynamic Linking Tunables
> @section Dynamic Linking Tunables
> @cindex dynamic linking tunables
> diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
> index 262bcdbeb8..e5f5c1ec98 100644
> --- a/sysdeps/generic/malloc-hugepages.c
> +++ b/sysdeps/generic/malloc-hugepages.c
> @@ -29,3 +29,9 @@ __malloc_thp_mode (void)
> {
> return malloc_thp_mode_not_supported;
> }
> +
> +/* Return the default transparent huge page size. */
> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
> +{
> + return (struct malloc_hugepage_config_t) { 0, 0 };
> +}
> diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
> index 664cda9b67..27f7adfea5 100644
> --- a/sysdeps/generic/malloc-hugepages.h
> +++ b/sysdeps/generic/malloc-hugepages.h
> @@ -34,4 +34,16 @@ enum malloc_thp_mode_t
>
> enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
>
> +struct malloc_hugepage_config_t
> +{
> + size_t pagesize;
> + int flags;
> +};
> +
> +/* Returned the support huge page size from the requested PAGESIZE along
> + with the requires extra mmap flags. Returning a 0 value for pagesize
> + disables its usage. */
> +struct malloc_hugepage_config_t __malloc_hugepage_config (size_t requested)
> + attribute_hidden;
> +
> #endif /* _MALLOC_HUGEPAGES_H */
> diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> index 66589127cd..0eb0c764ad 100644
> --- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
> +++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
> @@ -17,8 +17,10 @@
> not, see <https://www.gnu.org/licenses/>. */
>
> #include <intprops.h>
> +#include <dirent.h>
> #include <malloc-hugepages.h>
> #include <not-cancel.h>
> +#include <sys/mman.h>
>
> size_t
> __malloc_default_thp_pagesize (void)
> @@ -74,3 +76,126 @@ __malloc_thp_mode (void)
> }
> return malloc_thp_mode_not_supported;
> }
> +
> +static size_t
> +malloc_default_hugepage_size (void)
> +{
> + int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
> + if (fd == -1)
> + return 0;
> +
> + char buf[512];
> + off64_t off = 0;
> + while (1)
> + {
> + ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
> + if (r < 0)
> + break;
> + buf[r - 1] = '\0';
> +
> + const char *s = strstr (buf, "Hugepagesize:");
> + if (s == NULL)
> + {
> + char *nl = strrchr (buf, '\n');
> + if (nl == NULL)
> + break;
> + off += (nl + 1) - buf;
> + continue;
> + }
> +
> + /* The default huge page size is in the form:
> + Hugepagesize: NUMBER kB */
> + size_t hpsize = 0;
> + s += sizeof ("Hugepagesize: ") - 1;
> + for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
> + {
> + if (s[i] == ' ')
> + continue;
> + hpsize *= 10;
> + hpsize += s[i] - '0';
> + }
> + return hpsize * 1024;
> + }
> +
> + __close_nocancel (fd);
> +
> + return 0;
> +}
> +
> +static inline struct malloc_hugepage_config_t
> +make_malloc_hugepage_config (size_t pagesize)
> +{
> + int flags = MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
> + return (struct malloc_hugepage_config_t) { pagesize, flags };
> +}
> +
> +struct malloc_hugepage_config_t
> +__malloc_hugepage_config (size_t requested)
> +{
> + if (requested == 1)
> + {
> + size_t pagesize = malloc_default_hugepage_size ();
> + if (pagesize != 0)
> + return make_malloc_hugepage_config (pagesize);
> + }
> +
> + int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
> + O_RDONLY | O_DIRECTORY, 0);
> + if (dirfd == -1)
> + return (struct malloc_hugepage_config_t) { 0, 0 };
> +
> + bool found = false;
> +
> + char buffer[1024];
> + while (true)
> + {
> +#if !IS_IN(libc)
> +# define __getdents64 getdents64
> +#endif
> + ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
> + if (ret == -1)
> + break;
> + else if (ret == 0)
> + break;
> +
> + char *begin = buffer, *end = buffer + ret;
> + while (begin != end)
> + {
> + unsigned short int d_reclen;
> + memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
> + sizeof (d_reclen));
> + const char *dname = begin + offsetof (struct dirent64, d_name);
> + begin += d_reclen;
> +
> + if (dname[0] == '.'
> + || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
> + continue;
> +
> + /* Each entry represents a supported huge page in the form of:
> + hugepages-<size>kB. */
> + size_t hpsize = 0;
> + const char *sizestr = dname + sizeof ("hugepages-") - 1;
> + for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
> + {
> + hpsize *= 10;
> + hpsize += sizestr[i] - '0';
> + }
> + hpsize *= 1024;
> +
> + if (hpsize == requested)
> + {
> + found = true;
> + break;
> + }
> + }
> + if (found)
> + break;
> + }
> +
> + __close_nocancel (dirfd);
> +
> + if (found)
> + return make_malloc_hugepage_config (requested);
> +
> + return (struct malloc_hugepage_config_t) { 0, 0 };
> +}
Hi Adhemerval,
I tested this patchset on a POWER9, and I'm seeing the following test
failures when running make check with glibc.malloc.mmap_hugetlb=1:
malloc/tst-free-errno
malloc/tst-free-errno-malloc-check
malloc/tst-free-errno-mcheck
posix/tst-exec
posix/tst-exec-static
posix/tst-spawn
posix/tst-spawn-static
posix/tst-spawn5
I'm attaching a summary of the contents of the .out files for each test.
[-- Attachment #2: Summary of failing tests --]
[-- Type: text/plain, Size: 3604 bytes --]
$ failing="malloc/tst-free-errno malloc/tst-free-errno-malloc-check malloc/tst-free-errno-mcheck posix/tst-exec posix/tst-exec-static posix/tst-spawn posix/tst-spawn-static posix/tst-spawn5"
$
$ for t in $failing; do echo "~> $t"; { make test t=$t; GLIBC_TUNABLES="glibc.malloc.mmap_hugetlb=1" make test t=$t; } | grep -Ei "^fail|pass"; cat $t.out; echo; done
~> malloc/tst-free-errno
double free or corruption (out)
PASS: malloc/tst-free-errno
FAIL: malloc/tst-free-errno
Didn't expect signal from child: got `Aborted'
~> malloc/tst-free-errno-malloc-check
PASS: malloc/tst-free-errno-malloc-check
FAIL: malloc/tst-free-errno-malloc-check
error: xmmap.c:28: mmap of 16908288 bytes, prot=0x3, flags=0x32: Device or resource busy
error: 1 test failures
~> malloc/tst-free-errno-mcheck
memory clobbered past end of allocated block
PASS: malloc/tst-free-errno-mcheck
FAIL: malloc/tst-free-errno-mcheck
Didn't expect signal from child: got `Aborted'
~> posix/tst-exec
/home/mscastanho/build/glibc/posix/tst-exec: file 1 (4) is not closed
PASS: posix/tst-exec
FAIL: posix/tst-exec
~> posix/tst-exec-static
/home/mscastanho/build/glibc/posix/tst-exec-static: file 1 (4) is not closed
PASS: posix/tst-exec-static
FAIL: posix/tst-exec-static
~> posix/tst-spawn
PASS: posix/tst-spawn
FAIL: posix/tst-spawn
tst-spawn.c:127: numeric comparison failure
left: 0 (0x0); from: lseek (fd1, 0, SEEK_CUR)
right: -1 (0xffffffffffffffff); from: (off_t) -1
error: 1 test failures
tst-spawn.c:244: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
tst-spawn.c:127: numeric comparison failure
left: 0 (0x0); from: lseek (fd1, 0, SEEK_CUR)
right: -1 (0xffffffffffffffff); from: (off_t) -1
error: 1 test failures
tst-spawn.c:258: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: 2 test failures
~> posix/tst-spawn-static
PASS: posix/tst-spawn-static
FAIL: posix/tst-spawn-static
tst-spawn.c:127: numeric comparison failure
left: 0 (0x0); from: lseek (fd1, 0, SEEK_CUR)
right: -1 (0xffffffffffffffff); from: (off_t) -1
error: 1 test failures
tst-spawn.c:244: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
tst-spawn.c:127: numeric comparison failure
left: 0 (0x0); from: lseek (fd1, 0, SEEK_CUR)
right: -1 (0xffffffffffffffff); from: (off_t) -1
error: 1 test failures
tst-spawn.c:258: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: 2 test failures
~> posix/tst-spawn5
PASS: posix/tst-spawn5
FAIL: posix/tst-spawn5
error: tst-spawn5.c:128: unexpected open file descriptor 54: /proc/meminfo
tst-spawn5.c:182: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: tst-spawn5.c:128: unexpected open file descriptor 54: /proc/meminfo
tst-spawn5.c:182: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: tst-spawn5.c:128: unexpected open file descriptor 5: /proc/meminfo
tst-spawn5.c:182: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: tst-spawn5.c:128: unexpected open file descriptor 4: /proc/meminfo
tst-spawn5.c:182: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: tst-spawn5.c:128: unexpected open file descriptor 6: /proc/meminfo
tst-spawn5.c:182: numeric comparison failure
left: 1 (0x1); from: WEXITSTATUS (status)
right: 0 (0x0); from: 0
error: 5 test failures
[-- Attachment #3: Type: text/plain, Size: 21 bytes --]
--
Matheus Castanho
next prev parent reply other threads:[~2021-08-19 17:58 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-18 14:19 [PATCH v2 0/4] malloc: Improve Huge Page support Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 1/4] malloc: Add madvise support for Transparent Huge Pages Adhemerval Zanella
2021-08-18 18:42 ` Siddhesh Poyarekar
2021-08-19 12:00 ` Adhemerval Zanella
2021-08-19 12:22 ` Siddhesh Poyarekar
2021-08-18 14:19 ` [PATCH v2 2/4] malloc: Add THP/madvise support for sbrk Adhemerval Zanella
2021-08-18 14:19 ` [PATCH v2 3/4] malloc: Move mmap logic to its own function Adhemerval Zanella
2021-08-19 0:47 ` Siddhesh Poyarekar
2021-08-18 14:20 ` [PATCH v2 4/4] malloc: Add Huge Page support for sysmalloc Adhemerval Zanella
2021-08-19 1:03 ` Siddhesh Poyarekar
2021-08-19 12:08 ` Adhemerval Zanella
2021-08-19 17:58 ` Matheus Castanho [this message]
2021-08-19 18:50 ` Adhemerval Zanella
2021-08-20 12:34 ` Matheus Castanho
2021-08-18 18:11 ` [PATCH v2 0/4] malloc: Improve Huge Page support Siddhesh Poyarekar
2021-08-19 11:26 ` Adhemerval Zanella
2021-08-19 11:48 ` Siddhesh Poyarekar
2021-08-19 12:04 ` Adhemerval Zanella
2021-08-19 12:26 ` Siddhesh Poyarekar
2021-08-19 12:42 ` Adhemerval Zanella
2021-08-19 16:42 ` Guillaume Morin
2021-08-19 16:55 ` Adhemerval Zanella
2021-08-19 17:17 ` Guillaume Morin
2021-08-19 17:27 ` Adhemerval Zanella
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=871r6pjp03.fsf@linux.ibm.com \
--to=msc@linux.ibm.com \
--cc=adhemerval.zanella@linaro.org \
--cc=guillaume@morinfr.org \
--cc=libc-alpha@sourceware.org \
--cc=nmanthey@conp-solutions.com \
--cc=siddhesh@sourceware.org \
--cc=tuliom@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).