Index: testsuite/gcc.c-torture/execute/pr16660-1.c =================================================================== --- testsuite/gcc.c-torture/execute/pr16660-1.c (revision 0) +++ testsuite/gcc.c-torture/execute/pr16660-1.c (revision 0) @@ -0,0 +1,11 @@ +typedef __SIZE_TYPE__ size_t; +#define ALIGNMENT 256 +int main(void) +{ + int a[ALIGNMENT/sizeof(int)] __attribute__((aligned(ALIGNMENT))); + size_t b = (size_t)&a; + if (b&(ALIGNMENT-1)) + return 1; + return 0; +} + Index: testsuite/gcc.c-torture/execute/pr16660-2.c =================================================================== --- testsuite/gcc.c-torture/execute/pr16660-2.c (revision 0) +++ testsuite/gcc.c-torture/execute/pr16660-2.c (revision 0) @@ -0,0 +1,19 @@ +typedef __SIZE_TYPE__ size_t; +#define ALIGNMENT 256 +int main(void) +{ + { + int a[ALIGNMENT/sizeof(int)] __attribute__((aligned(ALIGNMENT))); + size_t b = (size_t)&a; + if (b&(ALIGNMENT-1)) + return 1; + } + { + int a1[ALIGNMENT/sizeof(int)] __attribute__((aligned(ALIGNMENT))); + size_t b = (size_t)&a1; + if (b&(ALIGNMENT-1)) + return 1; + } + return 0; +} + Index: testsuite/gcc.dg/pr16660-1.c =================================================================== --- testsuite/gcc.dg/pr16660-1.c (revision 0) +++ testsuite/gcc.dg/pr16660-1.c (revision 0) @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -fdump-rtl-expand" } */ +int g(int *); +int f(void) +{ + int i __attribute__((aligned(1) )); + int *a = &i; + g(a); + return *a; +} +/* Even though the user supplied an alignment of 1, the memory + location for i should have the natural alignment of the stack. + Test only on x86, spu and powerpc for now. */ + +/* { dg-final { scan-rtl-dump-not "2 i\\\+0 S4 A8" "expand" } } */ +/* { dg-final { scan-rtl-dump "2 i\\\+0 S4 A32" "expand" { target i?86-*-* x86_64-*-* } } } */ +/* { dg-final { scan-rtl-dump "2 i\\\+0 S4 A64" "expand" { target { { rs6000-*-* powerpc*-*-* } && ilp32 } } } } */ +/* { dg-final { scan-rtl-dump "2 i\\\+0 S4 A128" "expand" { target { spu-*-* } || { { rs6000-*-* powerpc*-*-* } && lp64 } } } } */ +/* { dg-final { cleanup-rtl-dump "expand" } } */ Index: expr.h =================================================================== --- expr.h (revision 124657) +++ expr.h (working copy) @@ -712,6 +712,7 @@ extern void update_nonlocal_goto_save_ar /* Allocate some space on the stack dynamically and return its address. An rtx says how many bytes. */ extern rtx allocate_dynamic_stack_space (rtx, rtx, int); +extern rtx allocate_dynamic_stack_space_1 (rtx, rtx, int, int); /* Probe a range of stack addresses from FIRST to FIRST+SIZE, inclusive. FIRST is a constant and size is a Pmode RTX. These are offsets from the Index: function.c =================================================================== --- function.c (revision 124657) +++ function.c (working copy) @@ -813,6 +813,7 @@ assign_temp (tree type_or_decl, int keep if (mode == BLKmode || memory_required) { HOST_WIDE_INT size = int_size_in_bytes (type); + unsigned HOST_WIDE_INT align; rtx tmp; /* Zero sized arrays are GNU C extension. Set size to 1 to avoid @@ -837,7 +838,37 @@ assign_temp (tree type_or_decl, int keep size = 1; } - tmp = assign_stack_temp_for_type (mode, size, keep, type); + align = TYPE_ALIGN (type); + if (decl && DECL_ALIGN (decl) > align) + align = DECL_ALIGN (decl); + + /* When the user specifies an alignment larger than + BIGGEST_ALIGNMENT we need to allocate extra space so we can + adjust this decl's address. */ + if (align > PREFERRED_STACK_BOUNDARY) + { + rtx addr; + HOST_WIDE_INT pad = (align - PREFERRED_STACK_BOUNDARY) / BITS_PER_UNIT; + size += pad; + tmp = assign_stack_temp_for_type (mode, size, keep, type); + addr = XEXP (tmp, 0); + /* CEIL_DIV_EXPR needs to worry about the addition overflowing, + but we know it can't. So add ourselves and then do + TRUNC_DIV_EXPR. */ + addr = expand_binop (Pmode, add_optab, addr, + GEN_INT (align / BITS_PER_UNIT - 1), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + addr = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, addr, + GEN_INT (align / BITS_PER_UNIT), + NULL_RTX, 1); + addr = expand_mult (Pmode, addr, + GEN_INT (align / BITS_PER_UNIT), + NULL_RTX, 1); + tmp = change_address (tmp, VOIDmode, addr); + } + else + tmp = assign_stack_temp_for_type (mode, size, keep, type); + return tmp; } Index: cfgexpand.c =================================================================== --- cfgexpand.c (revision 124657) +++ cfgexpand.c (working copy) @@ -41,6 +41,7 @@ Boston, MA 02110-1301, USA. */ #include "params.h" #include "tree-inline.h" #include "value-prof.h" +#include "optabs.h" /* Verify that there is exactly single jump instruction since last and attach REG_BR_PROB note specifying probability. @@ -151,8 +152,7 @@ static bool has_protected_decls; smaller than our cutoff threshold. Used for -Wstack-protector. */ static bool has_short_buffer; -/* Discover the byte alignment to use for DECL. Ignore alignment - we can't do with expected alignment of the stack boundary. */ +/* Discover the byte alignment to use for DECL. */ static unsigned int get_decl_align_unit (tree decl) @@ -161,8 +161,6 @@ get_decl_align_unit (tree decl) align = DECL_ALIGN (decl); align = LOCAL_ALIGNMENT (TREE_TYPE (decl), align); - if (align > PREFERRED_STACK_BOUNDARY) - align = PREFERRED_STACK_BOUNDARY; if (cfun->stack_alignment_needed < align) cfun->stack_alignment_needed = align; @@ -173,11 +171,21 @@ get_decl_align_unit (tree decl) Return the frame offset. */ static HOST_WIDE_INT -alloc_stack_frame_space (HOST_WIDE_INT size, HOST_WIDE_INT align) +alloc_stack_frame_space (HOST_WIDE_INT size, unsigned HOST_WIDE_INT align) { HOST_WIDE_INT offset, new_frame_offset; new_frame_offset = frame_offset; + + /* Allocate extra space if the alignment required is greater than the + stack boundary and then assume the RTL expansion of the variable, gets + the correct alignment. */ + if (align > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) + { + size += align; + align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT; + } + if (FRAME_GROWS_DOWNWARD) { new_frame_offset -= size + frame_phase; @@ -523,23 +531,38 @@ dump_stack_var_partition (void) static void expand_one_stack_var_at (tree decl, HOST_WIDE_INT offset) { - HOST_WIDE_INT align; + unsigned HOST_WIDE_INT align; rtx x; /* If this fails, we've overflowed the stack frame. Error nicely? */ gcc_assert (offset == trunc_int_for_mode (offset, Pmode)); x = plus_constant (virtual_stack_vars_rtx, offset); - x = gen_rtx_MEM (DECL_MODE (decl), x); + align = get_decl_align_unit (decl); - /* Set alignment we actually gave this decl. */ - offset -= frame_phase; - align = offset & -offset; - align *= BITS_PER_UNIT; - if (align > STACK_BOUNDARY || align == 0) - align = STACK_BOUNDARY; - DECL_ALIGN (decl) = align; - DECL_USER_ALIGN (decl) = 0; + if (align > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) + { + rtx addr = x; + addr = expand_binop (Pmode, add_optab, addr, + GEN_INT (align - 1), + NULL_RTX, 1, OPTAB_LIB_WIDEN); + addr = expand_and (Pmode, addr, GEN_INT (-align), + NULL_RTX); + x = addr; + } + else + { + /* Set alignment we actually gave this decl. */ + offset -= frame_phase; + align = offset & -offset; + align *= BITS_PER_UNIT; + if (align > STACK_BOUNDARY || align == 0) + align = STACK_BOUNDARY; + DECL_ALIGN (decl) = align; + DECL_USER_ALIGN (decl) = 0; + } + + x = gen_rtx_MEM (DECL_MODE (decl), x); set_mem_attributes (x, decl, true); SET_DECL_RTL (decl, x); Index: explow.c =================================================================== --- explow.c (revision 124657) +++ explow.c (working copy) @@ -1093,16 +1093,28 @@ update_nonlocal_goto_save_area (void) TARGET is a place in which the address can be placed. KNOWN_ALIGN is the alignment (in bits) that we know SIZE has. */ - rtx allocate_dynamic_stack_space (rtx size, rtx target, int known_align) { + return allocate_dynamic_stack_space_1 (size, target, known_align, + BIGGEST_ALIGNMENT); +} + +/* Like allocate_dynamic_stack_space expect provide a way, REQUIRED_ALGIN, + to say the required alignment of the address. */ +rtx +allocate_dynamic_stack_space_1 (rtx size, rtx target, int known_align, + int required_align) +{ /* If we're asking for zero bytes, it doesn't matter what we point to since we can't dereference it. But return a reasonable address anyway. */ if (size == const0_rtx) return virtual_stack_dynamic_rtx; + if (required_align < BIGGEST_ALIGNMENT) + required_align = BIGGEST_ALIGNMENT; + /* Otherwise, show we're calling alloca or equivalent. */ current_function_calls_alloca = 1; @@ -1131,13 +1143,13 @@ allocate_dynamic_stack_space (rtx size, #if defined (STACK_DYNAMIC_OFFSET) || defined (STACK_POINTER_OFFSET) #define MUST_ALIGN 1 #else -#define MUST_ALIGN (PREFERRED_STACK_BOUNDARY < BIGGEST_ALIGNMENT) +#define MUST_ALIGN (PREFERRED_STACK_BOUNDARY < required_align) #endif if (MUST_ALIGN) size = force_operand (plus_constant (size, - BIGGEST_ALIGNMENT / BITS_PER_UNIT - 1), + required_align / BITS_PER_UNIT - 1), NULL_RTX); #ifdef SETJMP_VIA_SAVE_AREA @@ -1293,18 +1305,12 @@ allocate_dynamic_stack_space (rtx size, if (MUST_ALIGN) { - /* CEIL_DIV_EXPR needs to worry about the addition overflowing, - but we know it can't. So add ourselves and then do - TRUNC_DIV_EXPR. */ target = expand_binop (Pmode, add_optab, target, - GEN_INT (BIGGEST_ALIGNMENT / BITS_PER_UNIT - 1), + GEN_INT (required_align / BITS_PER_UNIT - 1), NULL_RTX, 1, OPTAB_LIB_WIDEN); - target = expand_divmod (0, TRUNC_DIV_EXPR, Pmode, target, - GEN_INT (BIGGEST_ALIGNMENT / BITS_PER_UNIT), - NULL_RTX, 1); - target = expand_mult (Pmode, target, - GEN_INT (BIGGEST_ALIGNMENT / BITS_PER_UNIT), - NULL_RTX, 1); + target = expand_and (Pmode, target, + GEN_INT (-required_align / BITS_PER_UNIT), + NULL_RTX); } /* Record the new stack level for nonlocal gotos. */ Index: Makefile.in =================================================================== --- Makefile.in (revision 124657) +++ Makefile.in (working copy) @@ -2553,7 +2553,7 @@ cfgexpand.o : cfgexpand.c $(TREE_FLOW_H) $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(FUNCTION_H) $(TIMEVAR_H) $(TM_H) \ coretypes.h $(TREE_DUMP_H) except.h langhooks.h tree-pass.h $(RTL_H) \ $(DIAGNOSTIC_H) toplev.h $(BASIC_BLOCK_H) $(FLAGS_H) debug.h $(PARAMS_H) \ - value-prof.h + value-prof.h $(OPTABS_H) cfgrtl.o : cfgrtl.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(FLAGS_H) insn-config.h $(BASIC_BLOCK_H) $(REGS_H) hard-reg-set.h \ output.h toplev.h $(FUNCTION_H) except.h $(TM_P_H) insn-config.h $(EXPR_H) \ Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 124657) +++ config/i386/i386.c (working copy) @@ -5610,9 +5610,6 @@ ix86_compute_frame_layout (struct ix86_f gcc_assert (!size || stack_alignment_needed); gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); - gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); - gcc_assert (stack_alignment_needed - <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT); if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT) stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT; Index: stmt.c =================================================================== --- stmt.c (revision 124657) +++ stmt.c (working copy) @@ -1850,6 +1850,7 @@ void expand_decl (tree decl) { tree type; + unsigned HOST_WIDE_INT align; type = TREE_TYPE (decl); @@ -1874,6 +1875,13 @@ expand_decl (tree decl) if (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) return; + if (DECL_USER_ALIGN (decl)) + align = DECL_ALIGN (decl); + else if (DECL_MODE (decl) == BLKmode) + align = BIGGEST_ALIGNMENT; + else + align = TYPE_ALIGN (type); + /* Create the RTL representation for the variable. */ if (type == error_mark_node) @@ -1941,10 +1949,16 @@ expand_decl (tree decl) oldaddr = XEXP (DECL_RTL (decl), 0); } - /* Set alignment we actually gave this decl. */ - DECL_ALIGN (decl) = (DECL_MODE (decl) == BLKmode ? BIGGEST_ALIGNMENT - : GET_MODE_BITSIZE (DECL_MODE (decl))); - DECL_USER_ALIGN (decl) = 0; + /* Set alignment we actually gave this decl if we don't have + an user specific alignment and the alignment is less than + the biggest alignment. */ + if (! DECL_USER_ALIGN (decl) + || DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT) + { + DECL_ALIGN (decl) = (DECL_MODE (decl) == BLKmode ? BIGGEST_ALIGNMENT + : GET_MODE_BITSIZE (DECL_MODE (decl))); + DECL_USER_ALIGN (decl) = 0; + } x = assign_temp (decl, 1, 1, 1); set_mem_attributes (x, decl, 1); @@ -1975,8 +1989,10 @@ expand_decl (tree decl) DECL_ALIGN says how the variable is to be aligned and we cannot use it to conclude anything about the alignment of the size. */ - address = allocate_dynamic_stack_space (size, NULL_RTX, - TYPE_ALIGN (TREE_TYPE (decl))); + address = allocate_dynamic_stack_space_1 (size, NULL_RTX, + TYPE_ALIGN (TREE_TYPE (decl)), + MAX (TYPE_ALIGN (TREE_TYPE (decl)), + align)); /* Reference the variable indirect through that rtx. */ x = gen_rtx_MEM (DECL_MODE (decl), address);