// Compile & run: // gcc -Wall -g -o tststackalloc tststackalloc.c $< -lpthread // ./tststackalloc 1 # Attempt to use huge pages for stacks -> RSS bloat // ./tststackalloc 0 # Do not attempt to use huge pages -> No RSS bloat #include #include #include #include #include #include #include #include #include #include #include #include #include // Number of threads to create #define NOOF_THREADS (128) // Size of a small page (hard-coded) #define SMALL_PAGE_SIZE (4*1024) static size_t small_page_size = 0; // Size of a huge page (hard-coded) #define HUGE_PAGE_SIZE (2*1024*1024) static size_t huge_page_size = 0; // Total size of the thread stack, including the guard page(s) #define STACK_SIZE_TOTAL (HUGE_PAGE_SIZE) // Size of the guard page(s) #define GUARD_SIZE (SMALL_PAGE_SIZE) // When enabled (set to non-zero), tries to align thread stacks on // huge page boundaries, making them eligible for huge pages static int huge_page_align_stacks; static volatile int exit_thread = 0; unsigned long int default_thp_pagesize (void) { int fd = open ( "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", O_RDONLY); if (fd == -1) return -1; char str[64]; ssize_t s = read (fd, str, sizeof (str)); close (fd); if (s < 0) return -1; unsigned long int r = 0; for (ssize_t i = 0; i < s; i++) { if (str[i] == '\n') break; r *= 10; r += str[i] - '0'; } return r; } static void * start (void *arg) { while (!exit_thread) sleep (1); return NULL; } static char * next_line (int fd, char *const buffer, char **cp, char **re, char *const buffer_end) { char *res = *cp; char *nl = memchr (*cp, '\n', *re - *cp); if (nl == NULL) { if (*cp != buffer) { if (*re == buffer_end) { memmove (buffer, *cp, *re - *cp); *re = buffer + (*re - *cp); *cp = buffer; ssize_t n = read (fd, *re, buffer_end - *re); if (n < 0) return NULL; *re += n; nl = memchr (*cp, '\n', *re - *cp); while (nl == NULL && *re == buffer_end) { /* Truncate too long lines. */ *re = buffer + 3 * (buffer_end - buffer) / 4; n = read (fd, *re, buffer_end - *re); if (n < 0) return NULL; nl = memchr (*re, '\n', n); **re = '\n'; *re += n; } } else nl = memchr (*cp, '\n', *re - *cp); res = *cp; } if (nl == NULL) nl = *re - 1; } *cp = nl + 1; assert (*cp <= *re); return res == *re ? NULL : res; } static void read_proc_file (const char *fname, void (*closure)(const char *, size_t *), size_t *arg) { int fd = open (fname, O_RDONLY | O_CLOEXEC); assert (fd != -1); enum { buffer_size = 1024 }; char buffer[buffer_size]; char *buffer_end = buffer + buffer_size; char *cp = buffer_end; char *re = buffer_end; char *l; while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL) closure (l, arg); close (fd); } static void parse_statm_line (const char *line, size_t *ret) { long int rss; assert (sscanf (line, "%*d %ld", &rss) == 1); *ret = rss; } static void parse_statm (void) { size_t rss = 0; read_proc_file ("/proc/self/statm", parse_statm_line, &rss); fprintf (stderr, "[statm] RSS: %zd pages (%zd bytes = %zd MB)\n", rss, rss * small_page_size, rss * small_page_size / 1024 / 1024); } static void parse_smaps_line (const char *line, size_t *total) { static const char field[] = "Rss:"; size_t fieldlen = strlen (field); if (strncmp (line, field, fieldlen) != 0) return; // skip spaces for (line += fieldlen; isspace (*line); line++); enum { numberlen = 32 }; char number[numberlen]; size_t i; for (i = 0; isdigit (line[i]) && i < numberlen - 1; i++) number[i] = line[i]; number[i] = '\0'; // Assume kB. long int value = strtol (number, NULL, 10); assert (value != LONG_MIN && value != LONG_MAX && errno != ERANGE); *total += value * 1024; } static void parse_smaps (void) { size_t rss = 0; read_proc_file ("/proc/self/smaps", parse_smaps_line, &rss); fprintf (stderr, "[smaps] RSS: %zd bytes = %zd MB\n", rss, rss / (1024 * 1024)); } static inline uintptr_t align_down (uintptr_t value, uintptr_t alignment) { return value & ~(alignment - 1); } // Do a series of small, single page mmap calls to attempt to set // everything up so that the next mmap call (glibc allocating the // stack) returns a 2MB aligned range. The kernel "expands" vmas from // higher to lower addresses (subsequent calls return ranges starting // at lower addresses), so this function keeps calling mmap until it a // huge page aligned address is returned. The next range (the stack) // will then end on that same address. static void align_next_on (uintptr_t alignment) { uintptr_t p; do { p = (uintptr_t) mmap (NULL, small_page_size, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); } while (p != align_down (p, huge_page_size)); } static size_t parse_size_t (const char *value) { char *endptr; errno = 0; size_t n = strtoull (value, &endptr, 10); if (errno == ERANGE) { fprintf (stderr, "error: invalid %s value\n", value); exit (EXIT_FAILURE); } return n; } int main (int argc, char *argv[]) { int opt; size_t guard_size = GUARD_SIZE; size_t stack_size = STACK_SIZE_TOTAL; while ((opt = getopt (argc, argv, "g:s:m")) != -1) { switch (opt) { case 'g': guard_size = parse_size_t (optarg); break; case 's': stack_size = parse_size_t (optarg); break; case 'm': huge_page_align_stacks = 1; break; default: fprintf (stderr, "Usage: %s [-s stacksize] [-g guardsize] [-m]\n", argv[0]); exit (EXIT_FAILURE); } } huge_page_size = default_thp_pagesize (); if (huge_page_size == 0) huge_page_size = HUGE_PAGE_SIZE; { long int sz = sysconf (_SC_PAGESIZE); if (sz == -1) small_page_size = SMALL_PAGE_SIZE; else small_page_size = sz; } pthread_t t[NOOF_THREADS]; pthread_attr_t attr; int i; void *dummy = malloc (1024); free (dummy); fprintf (stderr, "Page size: %zd kB, %zd MB huge pages\n", small_page_size / 1024, huge_page_size / (1024 * 1024)); fprintf (stderr, "Stack size: %zd kB, guard size: %zd kB\n", stack_size / 1024, guard_size / 1024); if (huge_page_align_stacks) { fprintf (stderr, "Will attempt to align allocations to make stacks eligible for huge pages\n"); } pid_t pid = getpid (); fprintf (stderr, "pid: %d (/proc/%d/smaps)\n", pid, pid); pthread_attr_init (&attr); pthread_attr_setstacksize (&attr, stack_size); pthread_attr_setguardsize (&attr, guard_size); fprintf (stderr, "Creating %d threads...\n", NOOF_THREADS); for (i = 0; i < NOOF_THREADS; i++) { if (huge_page_align_stacks) { // align (next) allocation on huge page boundary align_next_on (huge_page_size); } pthread_create (&t[i], &attr, start, NULL); } sleep (1); parse_statm (); parse_smaps (); fprintf (stderr, "Press enter to exit...\n"); getchar (); exit_thread = 1; for (i = 0; i < NOOF_THREADS; i++) pthread_join (t[i], NULL); return 0; }