From 9ea451aef0f1f2fb0a36a7b718f910cfe285541d Mon Sep 17 00:00:00 2001 From: Dominik Vogt Date: Fri, 29 Apr 2016 08:36:59 +0100 Subject: [PATCH] Drop excess size used for run time allocated stack variables. The present calculation sometimes led to more stack memory being used than necessary with alloca. First, (STACK_BOUNDARY -1) would be added to the allocated size: size = plus_constant (Pmode, size, extra); size = force_operand (size, NULL_RTX); Then round_push was called and added another (STACK_BOUNDARY - 1) before rounding down to a multiple of STACK_BOUNDARY. On s390x this resulted in adding 14 before rounding down for "x" in the test case pr36728-1.c. round_push() now takes an argument to inform it about what has already been added to size. --- gcc/explow.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/gcc/explow.c b/gcc/explow.c index e0ce201..a039295 100644 --- a/gcc/explow.c +++ b/gcc/explow.c @@ -949,24 +949,30 @@ anti_adjust_stack (rtx adjust) } /* Round the size of a block to be pushed up to the boundary required - by this machine. SIZE is the desired size, which need not be constant. */ + by this machine. SIZE is the desired size, which need not be constant. + ALREADY_ADDED is the number of units that have already been added to SIZE for + other alignment reasons. +*/ static rtx -round_push (rtx size) +round_push (rtx size, int already_added) { - rtx align_rtx, alignm1_rtx; + rtx align_rtx, add_rtx; if (!SUPPORTS_STACK_ALIGNMENT || crtl->preferred_stack_boundary == MAX_SUPPORTED_STACK_ALIGNMENT) { int align = crtl->preferred_stack_boundary / BITS_PER_UNIT; + int add; if (align == 1) return size; + add = (align > already_added) ? align - already_added - 1 : 0; + if (CONST_INT_P (size)) { - HOST_WIDE_INT new_size = (INTVAL (size) + align - 1) / align * align; + HOST_WIDE_INT new_size = (INTVAL (size) + add) / align * align; if (INTVAL (size) != new_size) size = GEN_INT (new_size); @@ -974,7 +980,7 @@ round_push (rtx size) } align_rtx = GEN_INT (align); - alignm1_rtx = GEN_INT (align - 1); + add_rtx = (add > 0) ? GEN_INT (add) : const0_rtx; } else { @@ -983,15 +989,15 @@ round_push (rtx size) substituted by the right value in vregs pass and optimized during combine. */ align_rtx = virtual_preferred_stack_boundary_rtx; - alignm1_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), - NULL_RTX); + add_rtx = force_operand (plus_constant (Pmode, align_rtx, -1), NULL_RTX); } /* CEIL_DIV_EXPR needs to worry about the addition overflowing, but we know it can't. So add ourselves and then do TRUNC_DIV_EXPR. */ - size = expand_binop (Pmode, add_optab, size, alignm1_rtx, - NULL_RTX, 1, OPTAB_LIB_WIDEN); + if (add_rtx != const0_rtx) + size = expand_binop (Pmode, add_optab, size, add_rtx, + NULL_RTX, 1, OPTAB_LIB_WIDEN); size = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, size, align_rtx, NULL_RTX, 1); size = expand_mult (Pmode, size, align_rtx, NULL_RTX, 1); @@ -1175,6 +1181,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, rtx_code_label *final_label; rtx final_target, target; unsigned extra_align = 0; + unsigned extra = 0; bool must_align; /* If we're asking for zero bytes, it doesn't matter what we point @@ -1275,9 +1282,9 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, extra_align = BITS_PER_UNIT; #endif - if (must_align) + if (must_align && required_align > extra_align) { - unsigned extra = (required_align - extra_align) / BITS_PER_UNIT; + extra = (required_align - extra_align) / BITS_PER_UNIT; size = plus_constant (Pmode, size, extra); size = force_operand (size, NULL_RTX); @@ -1285,7 +1292,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, if (flag_stack_usage_info) stack_usage_size += extra; - if (extra && size_align > extra_align) + if (size_align > extra_align) size_align = extra_align; } @@ -1304,7 +1311,7 @@ allocate_dynamic_stack_space (rtx size, unsigned size_align, momentarily mis-aligning the stack. */ if (size_align % MAX_SUPPORTED_STACK_ALIGNMENT != 0) { - size = round_push (size); + size = round_push (size, extra); if (flag_stack_usage_info) { -- 2.3.0