From f4d252eb2c860450b3738591fca36f568c958232 Mon Sep 17 00:00:00 2001 From: Dominik Vogt Date: Fri, 29 Apr 2016 08:36:59 +0100 Subject: [PATCH] Drop excess size used for run time allocated stack variables. The present calculation sometimes led to more stack memory being used than necessary with alloca. First, (STACK_BOUNDARY -1) would be added to the allocated size: size = plus_constant (Pmode, size, extra); size = force_operand (size, NULL_RTX); Then round_push was called and added another (STACK_BOUNDARY - 1) before rounding down to a multiple of STACK_BOUNDARY. On s390x this resulted in adding 14 before rounding down for "x" in the test case pr36728-1.c. round_push() now takes an argument to inform it about what has already been added to size. --- gcc/explow.c | 33 ++++++++++++++++----------- gcc/testsuite/gcc.dg/pr50938.c | 52 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr50938.c diff --git a/gcc/explow.c b/gcc/explow.c index e0ce201..1858597 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -949,24 +949,30 @@ anti_adjust_stack (rtx adjust) } /* Round the size of a block to be pushed up to the boundary required - by this machine. SIZE is the desired size, which need not be constant. */ + by this machine. SIZE is the desired size, which need not be constant. + ALREADY_ADDED is the number of units that have already been added to SIZE + for other alignment reasons. +*/ static rtx -round_push (rtx size) +round_push (rtx size, int already_added) { - rtx align_rtx, alignm1_rtx; + rtx align_rtx, add_rtx; if (!SUPPORTS_STACK_ALIGNMENT || crtl->preferred_stack_boundary == MAX_SUPPORTED_STACK_ALIGNMENT) { int align = crtl->preferred_stack_boundary / BITS_PER_UNIT; + int add; if (align == 1) return size; + add = (align > already_added) ? align - already_added - 1 : 0; + if (CONST_INT_P (size)) { - HOST_WIDE_INT new_size = (INTVAL (size) + align - 1) / align * align; + HOST_WIDE_INT new_size = (INTVAL (size) + add) / align * align; if (INTVAL (size) != new_size) size = GEN_INT (new_size); @@ -974,7 +980,7 @@ round_push (rtx size) } align_rtx = GEN_INT (align); - alignm1_rtx = GEN_INT (align - 1); + add_rtx = (add > 0) ? GEN_INT (add) : const0_rtx; } else { @@ -983,15 +989,15 @@ round_push (rtx size) substituted by the right value in vregs pass and optimized during combine. */ align_rtx = virtual_preferred_stack_boundary_rtx; - alignm1_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), - NULL_RTX); + add_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), NULL_RTX); } /* CEIL_DIV_EXPR needs to worry about the addition overflowing, but we know it can't. So add ourselves and then do TRUNC_DIV_EXPR. */ - size = expand_binop (Pmode, add_optab, size, alignm1_rtx, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (add_rtx != const0_rtx) + size = expand_binop (Pmode, add_optab, size, add_rtx, + NULL_RTX, 1, OPTAB_LIB_WIDEN); size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx, NULL_RTX, 1); size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1); @@ -1175,6 +1181,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, rtx_code_label *final_label; rtx final_target, target; unsigned extra_align = 0; + unsigned extra = 0; bool must_align; /* If we're asking for zero bytes, it doesn't matter what we point @@ -1275,9 +1282,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, extra_align = BITS_PER_UNIT; #endif - if (must_align) + if (must_align && required_align > extra_align) { - unsigned extra = (required_align - extra_align) / BITS_PER_UNIT; + extra = (required_align - extra_align) / BITS_PER_UNIT; size = plus_constant (Pmode, size, extra); size = force_operand (size, NULL_RTX); @@ -1285,7 +1292,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, if (flag_stack_usage_info) stack_usage_size += extra; - if (extra && size_align > extra_align) + if (size_align > extra_align) size_align = extra_align; } @@ -1304,7 +1311,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, momentarily mis-aligning the stack. */ if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0) { - size = round_push (size); + size = round_push (size, extra); if (flag_stack_usage_info) { diff --git a/gcc/testsuite/gcc.dg/pr50938.c b/gcc/testsuite/gcc.dg/pr50938.c new file mode 100644 index 0000000..14b7540 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr50938.c @@ -0,0 +1,52 @@ +/* PR/50938: Check that alloca () reserves the correct amount of stack space. + */ + +/* { dg-do run } */ +/* { dg-options "-O0" } */ +/* { dg-require-effective-target alloca } */ + +__attribute__ ((noinline)) +static void +dummy (void) +{ +} + +__attribute__ ((noinline)) +static char * +get_frame_addr (void *p) +{ + void *faddr = __builtin_frame_address (0); + /* Call a function to make sure that a stack frame exists. */ + dummy (); + return faddr; +} + +__attribute__ ((noinline)) +static void * +stack_var (void) +{ + /* 1024 bytes on the stack. */ + char s[1024]; + /* One stack slot. */ + void *p = (void *)s; + /* Keep stack memory alive by passing it to the function. */ + return get_frame_addr (p); +} + +__attribute__ ((noinline)) +static void * +alloca_var (void) +{ + /* 1024 bytes on the stack plus one stack slot for a. */ + void *a = __builtin_alloca (1024); + return get_frame_addr (a); +} + +int main (void) +{ + if (stack_var () != alloca_var ()) + /* The functions used a different amount of stack space. */ + __builtin_abort (); + + return 0; +} -- 2.3.0