// Compile & run: // gcc -Wall -g -o tststackalloc tststackalloc.c $< -lpthread // ./tststackalloc 1 # Attempt to use huge pages for stacks -> RSS bloat // ./tststackalloc 0 # Do not attempt to use huge pages -> No RSS bloat #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include // Number of threads to create #define NOOF_THREADS (128) // Size of a small page (hard-coded) #define SMALL_PAGE_SIZE (4*1024) // Size of a huge page (hard-coded) #define HUGE_PAGE_SIZE (2*1024*1024) // Total size of the thread stack, including the guard page(s) #define STACK_SIZE_TOTAL (HUGE_PAGE_SIZE) // Size of the guard page(s) #define GUARD_SIZE (SMALL_PAGE_SIZE) //#define PRINT_STACK_RANGES //#define PRINT_PROC_SMAPS // When enabled (set to non-zero), tries to align thread stacks on // huge page boundaries, making them eligible for huge pages static int huge_page_align_stacks; static volatile int exit_thread = 0; #if defined(PRINT_STACK_RANGES) static void print_stack_range(void) { pthread_attr_t attr; void* bottom; size_t size; int err; err = pthread_getattr_np(pthread_self(), &attr); if (err != 0) { fprintf(stderr, "Error looking up attr\n"); exit(1); } err = pthread_attr_getstack(&attr, &bottom, &size); if (err != 0) { fprintf(stderr, "Cannot locate current stack attributes!\n"); exit(1); } pthread_attr_destroy(&attr); fprintf(stderr, "Stack: %p-%p (0x%zx/%zd)\n", bottom, bottom + size, size, size); } #endif static void* start(void* arg) { #if defined(PRINT_STACK_RANGES) print_stack_range(); #endif while(!exit_thread) { sleep(1); } return NULL; } #if defined(PRINT_PROC_SMAPS) static void print_proc_file(const char* file) { char path[128]; snprintf(path, sizeof(path), "/proc/self/%s", file); int smap = open(path, O_RDONLY); char buf[4096]; int x = 0; while ((x = read(smap, buf, sizeof(buf))) > 0) { write(1, buf, x); } close(smap); } #endif static size_t get_rss(void) { FILE* stat = fopen("/proc/self/statm", "r"); long rss; fscanf(stat, "%*d %ld", &rss); return rss; } uintptr_t align_down(uintptr_t value, uintptr_t alignment) { return value & ~(alignment - 1); } // Do a series of small, single page mmap calls to attempt to set // everything up so that the next mmap call (glibc allocating the // stack) returns a 2MB aligned range. The kernel "expands" vmas from // higher to lower addresses (subsequent calls return ranges starting // at lower addresses), so this function keeps calling mmap until it a // huge page aligned address is returned. The next range (the stack) // will then end on that same address. static void align_next_on(uintptr_t alignment) { uintptr_t p; do { p = (uintptr_t)mmap(NULL, SMALL_PAGE_SIZE, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0); } while (p != align_down(p, HUGE_PAGE_SIZE)); } int main(int argc, char* argv[]) { pthread_t t[NOOF_THREADS]; pthread_attr_t attr; int i; if (argc != 2) { printf("Usage: %s \n", argv[0]); printf(" huge page stacks = 1 - attempt to use huge pages for stacks\n"); exit(1); } huge_page_align_stacks = atoi(argv[1]); void* dummy = malloc(1024); free(dummy); fprintf(stderr, "Page size: %d kB, %d MB huge pages\n", SMALL_PAGE_SIZE / 1024, HUGE_PAGE_SIZE / (1024 * 1024)); if (huge_page_align_stacks) { fprintf(stderr, "Will attempt to align allocations to make stacks eligible for huge pages\n"); } pid_t pid = getpid(); fprintf(stderr, "pid: %d (/proc/%d/smaps)\n", pid, pid); size_t guard_size = GUARD_SIZE; size_t stack_size = STACK_SIZE_TOTAL; pthread_attr_init(&attr); pthread_attr_setstacksize(&attr, stack_size); pthread_attr_setguardsize(&attr, guard_size); fprintf(stderr, "Creating %d threads...\n", NOOF_THREADS); for (i = 0; i < NOOF_THREADS; i++) { if (huge_page_align_stacks) { // align (next) allocation on huge page boundary align_next_on(HUGE_PAGE_SIZE); } pthread_create(&t[i], &attr, start, NULL); } sleep(1); #if defined(PRINT_PROC_SMAPS) print_proc_file("smaps"); #endif size_t rss = get_rss(); fprintf(stderr, "RSS: %zd pages (%zd bytes = %zd MB)\n", rss, rss * SMALL_PAGE_SIZE, rss * SMALL_PAGE_SIZE / 1024 / 1024); fprintf(stderr, "Press enter to exit...\n"); getchar(); exit_thread = 1; for (i = 0; i < NOOF_THREADS; i++) { pthread_join(t[i], NULL); } return 0; }