2015-08-03 Nathan Sidwell * config/nvptx/nvptx.c (nvptx_reorg): Check get_oacc_fn_attrib for launch dimensions and only do parallel processing when present. Check dimensions to determine neutering requirements. (nvptx_record_offload_symbol): Launch dimension attribute must be present on offloaded functions. Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 226485) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -2980,13 +2980,42 @@ nvptx_reorg (void) if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0) regno_reg_rtx[i] = const0_rtx; - parallel *pars = nvptx_discover_pars (&bb_insn_map); - - nvptx_process_pars (pars); - nvptx_neuter_pars (pars, (GOMP_DIM_MASK (GOMP_DIM_VECTOR) - | GOMP_DIM_MASK (GOMP_DIM_WORKER)), 0); - - delete pars; + /* Determine launch dimensions of the function. If it is not an + offloaded function (i.e. this is a regular compiler), the + function has no neutering. */ + tree attr = get_oacc_fn_attrib (current_function_decl); + if (attr) + { + unsigned mask = 0; + tree dims = TREE_VALUE (attr); + unsigned ix; + + for (ix = 0; ix != GOMP_DIM_MAX; ix++) + { + unsigned HOST_WIDE_INT dim = 0; + + if (dims) + { + tree cst = TREE_VALUE (dims); + + dim = TREE_INT_CST_LOW (cst); + dims = TREE_CHAIN (dims); + } + if (dim != 1) + mask |= GOMP_DIM_MASK (ix); + } + /* If there is worker neutering, there must be vector + neutering. Otherwise the hardware will fail. This really + should be dealt with earlier because it indicates faulty + logic in determining launch dimensions. */ + if (mask & GOMP_DIM_MASK (GOMP_DIM_WORKER)) + mask |= GOMP_DIM_MASK (GOMP_DIM_VECTOR); + + parallel *pars = nvptx_discover_pars (&bb_insn_map); + nvptx_process_pars (pars); + nvptx_neuter_pars (pars, mask, 0); + delete pars; + } nvptx_reorg_subreg (); @@ -3073,32 +3102,25 @@ nvptx_record_offload_symbol (tree decl) case FUNCTION_DECL: { tree attr = get_oacc_fn_attrib (decl); - tree dims = NULL_TREE; + tree dims = TREE_VALUE (attr); unsigned ix; - if (attr) - dims = TREE_VALUE (attr); fprintf (asm_out_file, "//:FUNC_MAP \"%s\"", IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))); - for (ix = 0; ix != GOMP_DIM_MAX; ix++) + for (ix = 0; ix != GOMP_DIM_MAX; ix++, dims = TREE_CHAIN (dims)) { - unsigned HOST_WIDE_INT dim = 0; - if (dims) - { - tree cst = TREE_VALUE (dims); - - /* When device_type support is added an ealier pass - should have massaged the attribute to be - ptx-specific. */ - gcc_assert (TREE_CODE (cst) == INTEGER_CST); - - dim = TREE_INT_CST_LOW (cst); - dims = TREE_CHAIN (dims); - } + tree cst = TREE_VALUE (dims); + + /* When device_type support is added an earlier pass + should have massaged the attribute to be + ptx-specific. */ + gcc_assert (TREE_CODE (cst) == INTEGER_CST); + + unsigned HOST_WIDE_INT dim = TREE_INT_CST_LOW (cst); fprintf (asm_out_file, ", " HOST_WIDE_INT_PRINT_HEX, dim); } - + fprintf (asm_out_file, "\n"); } break;