2016-08-03 Nathan Sidwell gcc/ * config/nvptx/nvptx.c (nvptx_declare_function_name): Round frame size to DImode boundary. (nvptx_propagate): Likewise. libgomp/ * testsuite/libgomp.oacc-c-c++-common/crash-1.c: New. Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 239084) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -999,11 +999,14 @@ nvptx_declare_function_name (FILE *file, init_frame (file, STACK_POINTER_REGNUM, UNITS_PER_WORD, crtl->outgoing_args_size); - /* Declare a local variable for the frame. */ + /* Declare a local variable for the frame. Force its size to be + DImode-compatible. */ HOST_WIDE_INT sz = get_frame_size (); if (sz || cfun->machine->has_chain) init_frame (file, FRAME_POINTER_REGNUM, - crtl->stack_alignment_needed / BITS_PER_UNIT, sz); + crtl->stack_alignment_needed / BITS_PER_UNIT, + (sz + GET_MODE_SIZE (DImode) - 1) + & ~(HOST_WIDE_INT)(GET_MODE_SIZE (DImode) - 1)); /* Declare the pseudos we have as ptx registers. */ int maxregs = max_reg_num (); @@ -3222,8 +3225,9 @@ nvptx_propagate (basic_block block, rtx_ rtx pred = NULL_RTX; rtx_code_label *label = NULL; - gcc_assert (!(fs & (GET_MODE_SIZE (DImode) - 1))); - fs /= GET_MODE_SIZE (DImode); + /* The frame size might not be DImode compatible, but the frame + array's declaration will be. So it's ok to round up here. */ + fs = (fs + GET_MODE_SIZE (DImode) - 1) / GET_MODE_SIZE (DImode); /* Detect single iteration loop. */ if (fs == 1) fs = 0; Index: libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c (nonexistent) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/crash-1.c (working copy) @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-options "-O0" } */ + +/* ICEd in nvptx backend due to unexpected frame size. */ +#pragma acc routine worker +void +worker_matmul (int *c, int i) +{ + int j; + +#pragma acc loop + for (j = 0; j < 4; j++) + c[j] = j; +} + + +int +main () +{ + int c[4]; + +#pragma acc parallel + { + worker_matmul (c, 0); + } + + return 0; +}