2015-08-03 Nathan Sidwell gcc/ * internal-fn.def (GOACC_DIM_SIZE, GOACC_DFIM_POS): New. * internal-fn.c (expand_GOACC_DIM_SIZE, expand_GOACC_DIM_POS): New. * config/nvptx.md (UNSPEC_NID, UNSPEC_ID): Rename to ... (UNSPEC_DIM_SIZE, UNSPEC_DIM_POS): ... here. (oacc_nid, oacc_id): Rename to ... (oacc_dim_size, oacc_dim_pos): ... here. Adjust. * config/nvptx.c (nvptx_single): Adjust. * omp-low.c (expand_oacc_get_num_threads, expand_oacc_get_thread_num, oacc_init_count_vars): Use new internal builtins. * omp-builtins.def (BUILT_IN_GOACC_ID, BUILT_IN_GOACC_NID): Delete. * builtins.c (expand_oacc_id): Delete. (expand_builtin, is_simpe_biltin): Adjust. libgomp/ * testuite/libgomp.oacc-c-c++-common/gang-static-2.c: Use asm insert. Index: gcc/internal-fn.def =================================================================== --- gcc/internal-fn.def (revision 226515) +++ gcc/internal-fn.def (working copy) @@ -66,3 +66,5 @@ DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | E DEF_INTERNAL_FN (GOACC_DATA_END_WITH_ARG, ECF_NOTHROW, ".r") DEF_INTERNAL_FN (GOACC_FORK, ECF_NOTHROW | ECF_LEAF, ".") DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW | ECF_LEAF, ".") +DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".") +DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_NOTHROW | ECF_LEAF, ".") Index: gcc/omp-builtins.def =================================================================== --- gcc/omp-builtins.def (revision 226515) +++ gcc/omp-builtins.def (working copy) @@ -58,10 +58,6 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC DEF_GOACC_BUILTIN (BUILT_IN_GOACC_WAIT, "GOACC_wait", BT_FN_VOID_INT_INT_VAR, ATTR_NOTHROW_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_ID, "GOACC_id", - BT_FN_UINT_UINT, ATTR_NOTHROW_LEAF_LIST) -DEF_GOACC_BUILTIN (BUILT_IN_GOACC_NID, "GOACC_nid", - BT_FN_UINT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_GET_GANGLOCAL_PTR, "GOACC_get_ganglocal_ptr", BT_FN_PTR, ATTR_NOTHROW_LEAF_LIST) DEF_GOACC_BUILTIN (BUILT_IN_GOACC_DEVICEPTR, "GOACC_deviceptr", Index: gcc/config/nvptx/nvptx.md =================================================================== --- gcc/config/nvptx/nvptx.md (revision 226515) +++ gcc/config/nvptx/nvptx.md (working copy) @@ -49,7 +49,7 @@ UNSPEC_ALLOCA - UNSPEC_NID + UNSPEC_DIM_SIZE UNSPEC_SHARED_DATA @@ -65,7 +65,7 @@ UNSPECV_CAS UNSPECV_XCHG UNSPECV_BARSYNC - UNSPECV_ID + UNSPECV_DIM_POS UNSPECV_FORK UNSPECV_FORKED @@ -1335,9 +1335,10 @@ DONE; }) -(define_insn "oacc_nid" +(define_insn "oacc_dim_size" [(set (match_operand:SI 0 "nvptx_register_operand" "") - (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] UNSPEC_NID))] + (unspec:SI [(match_operand:SI 1 "const_int_operand" "")] + UNSPEC_DIM_SIZE))] "" { static const char *const asms[] = @@ -1349,10 +1350,10 @@ return asms[INTVAL (operands[1])]; }) -(define_insn "oacc_id" +(define_insn "oacc_dim_pos" [(set (match_operand:SI 0 "nvptx_register_operand" "") (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")] - UNSPECV_ID))] + UNSPECV_DIM_POS))] "" { static const char *const asms[] = Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 226515) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -2771,7 +2771,7 @@ nvptx_single (unsigned mask, basic_block rtx pred = gen_reg_rtx (BImode); rtx_code_label *label = gen_label_rtx (); - emit_insn_before (gen_oacc_id (id, GEN_INT (mode)), head); + emit_insn_before (gen_oacc_dim_pos (id, GEN_INT (mode)), head); rtx cond = gen_rtx_SET (pred, gen_rtx_NE (BImode, id, const0_rtx)); emit_insn_before (cond, head); rtx br; Index: gcc/internal-fn.c =================================================================== --- gcc/internal-fn.c (revision 226515) +++ gcc/internal-fn.c (working copy) @@ -1984,6 +1984,42 @@ expand_GOACC_JOIN (gcall *stmt ATTRIBUTE #endif } +static void +expand_GOACC_DIM_SIZE (gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx val = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX, + VOIDmode, EXPAND_NORMAL); +#ifdef HAVE_oacc_dim_size + emit_insn (gen_oacc_dim_size (target, val)); +#else + emit_move_insn (target, const1_rtx); +#endif +} + +static void +expand_GOACC_DIM_POS (gcall *stmt) +{ + tree lhs = gimple_call_lhs (stmt); + + if (!lhs) + return; + + rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE); + rtx val = expand_expr (gimple_call_arg (stmt, 0), NULL_RTX, + VOIDmode, EXPAND_NORMAL); +#ifdef HAVE_oacc_dim_pos + emit_insn (gen_oacc_dim_pos (target, val)); +#else + emit_move_insn (target, const0_rtx); +#endif +} + /* Routines to expand each internal function, indexed by function number. Each routine has the prototype: Index: gcc/builtins.c =================================================================== --- gcc/builtins.c (revision 226515) +++ gcc/builtins.c (working copy) @@ -5921,59 +5921,6 @@ expand_builtin_acc_on_device (tree exp, return target; } -/* Expand a thread-id/thread-count builtin for OpenACC. */ - -static rtx -expand_oacc_id (enum built_in_function fcode, tree exp, rtx target) -{ - tree arg0 = CALL_EXPR_ARG (exp, 0); - rtx result = const0_rtx; - rtx arg; - - arg = expand_normal (arg0); - - if (GET_CODE (arg) != CONST_INT || UINTVAL (arg) >= GOMP_DIM_MAX) - { - error ("argument to %D must be constant in range 0 to %d", - get_callee_fndecl (exp), GOMP_DIM_MAX - 1); - return result; - } - - enum insn_code icode = CODE_FOR_nothing; - switch (fcode) - { - case BUILT_IN_GOACC_NID: -#ifdef HAVE_oacc_nid - icode = CODE_FOR_oacc_nid; -#endif - result = const1_rtx; - break; - case BUILT_IN_GOACC_ID: -#ifdef HAVE_oacc_id - icode = CODE_FOR_oacc_id; -#endif - break; - default: - gcc_unreachable (); - break; - } - - if (icode != CODE_FOR_nothing) - { - machine_mode mode = insn_data[icode].operand[0].mode; - rtx tmp = target; - if (!REG_P (tmp) || GET_MODE (tmp) != mode) - tmp = gen_reg_rtx (mode); - rtx insn = GEN_FCN (icode) (tmp, arg); - if (insn != NULL_RTX) - { - emit_insn (insn); - return tmp; - } - } - return result; -} - static rtx expand_oacc_ganglocal_ptr (rtx target ATTRIBUTE_UNUSED) { @@ -7135,10 +7082,6 @@ expand_builtin (tree exp, rtx target, rt return target; break; - case BUILT_IN_GOACC_ID: - case BUILT_IN_GOACC_NID: - return expand_oacc_id (fcode, exp, target); - case BUILT_IN_GOACC_GET_GANGLOCAL_PTR: target = expand_oacc_ganglocal_ptr (target); if (target) @@ -12497,8 +12440,6 @@ is_simple_builtin (tree decl) case BUILT_IN_EH_FILTER: case BUILT_IN_EH_POINTER: case BUILT_IN_EH_COPY_VALUES: - /* Just a special register read. */ - case BUILT_IN_GOACC_NID: return true; default: Index: gcc/omp-low.c =================================================================== --- gcc/omp-low.c (revision 226515) +++ gcc/omp-low.c (working copy) @@ -4676,7 +4676,6 @@ static tree expand_oacc_get_num_threads (gimple_seq *seq, int gwv_bits) { tree res = build_int_cst (unsigned_type_node, 1); - tree decl = builtin_decl_explicit (BUILT_IN_GOACC_NID); unsigned ix; for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++) @@ -4684,7 +4683,7 @@ expand_oacc_get_num_threads (gimple_seq { tree arg = build_int_cst (unsigned_type_node, ix); tree count = create_tmp_var (unsigned_type_node); - gimple call = gimple_build_call (decl, 1, arg); + gimple call = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg); gimple_call_set_lhs (call, count); gimple_seq_add_stmt (seq, call); @@ -4702,8 +4701,6 @@ static tree expand_oacc_get_thread_num (gimple_seq *seq, int gwv_bits) { tree res = NULL_TREE; - tree id_decl = builtin_decl_explicit (BUILT_IN_GOACC_ID); - tree nid_decl = builtin_decl_explicit (BUILT_IN_GOACC_NID); unsigned ix; /* Start at gang level, and examine relevant dimension indices. */ @@ -4717,7 +4714,8 @@ expand_oacc_get_thread_num (gimple_seq * /* We had an outer index, so scale that by the size of this dimension. */ tree n = create_tmp_var (unsigned_type_node); - gimple call = gimple_build_call (nid_decl, 1, arg); + gimple call + = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg); gimple_call_set_lhs (call, n); gimple_seq_add_stmt (seq, call); @@ -4726,7 +4724,7 @@ expand_oacc_get_thread_num (gimple_seq * /* Determine index in this dimension. */ tree id = create_tmp_var (unsigned_type_node); - gimple call = gimple_build_call (id_decl, 1, arg); + gimple call = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg); gimple_call_set_lhs (call, id); gimple_seq_add_stmt (seq, call); @@ -11671,8 +11669,6 @@ lower_omp_taskreg (gimple_stmt_iterator static void oacc_init_count_vars (omp_context *ctx, tree clauses ATTRIBUTE_UNUSED) { - tree getid = builtin_decl_explicit (BUILT_IN_GOACC_ID); - tree getnid = builtin_decl_explicit (BUILT_IN_GOACC_NID); tree worker_var, worker_count; if (ctx->gwv_this & GOMP_DIM_MASK (GOMP_DIM_WORKER)) @@ -11682,11 +11678,11 @@ oacc_init_count_vars (omp_context *ctx, worker_var = create_tmp_var (unsigned_type_node, ".worker"); worker_count = create_tmp_var (unsigned_type_node, ".workercount"); - gimple call1 = gimple_build_call (getid, 1, arg); + gimple call1 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg); gimple_call_set_lhs (call1, worker_var); gimple_seq_add_stmt (&ctx->ganglocal_init, call1); - gimple call2 = gimple_build_call (getnid, 1, arg); + gimple call2 = gimple_build_call_internal (IFN_GOACC_DIM_SIZE, 1, arg); gimple_call_set_lhs (call2, worker_count); gimple_seq_add_stmt (&ctx->ganglocal_init, call2); } Index: libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c =================================================================== --- libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c (revision 226515) +++ libgomp/testsuite/libgomp.oacc-c-c++-common/gang-static-2.c (working copy) @@ -1,9 +1,17 @@ /* { dg-do run { target openacc_nvidia_accel_selected } } */ +/* { dg-additional-options "-O2" } */ #include +#include #define N 100 +#define GANG_ID(I) \ + (acc_on_device (acc_device_nvidia) \ + ? ({unsigned __r; \ + __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (__r)); \ + __r; }) : (I)) + int test_static(int *a, int num_gangs, int sarg) { @@ -35,38 +43,38 @@ main () #pragma acc parallel loop gang (static:*) num_gangs (10) for (i = 0; i < 100; i++) - a[i] = __builtin_GOACC_id (0); + a[i] = GANG_ID (i); test_nonstatic (a, 10); #pragma acc parallel loop gang (static:1) num_gangs (10) for (i = 0; i < 100; i++) - a[i] = __builtin_GOACC_id (0); + a[i] = GANG_ID (i); test_static (a, 10, 1); #pragma acc parallel loop gang (static:2) num_gangs (10) for (i = 0; i < 100; i++) - a[i] = __builtin_GOACC_id (0); + a[i] = GANG_ID (i); test_static (a, 10, 2); #pragma acc parallel loop gang (static:5) num_gangs (10) for (i = 0; i < 100; i++) - a[i] = __builtin_GOACC_id (0); + a[i] = GANG_ID (i); test_static (a, 10, 5); #pragma acc parallel loop gang (static:20) num_gangs (10) for (i = 0; i < 100; i++) - a[i] = __builtin_GOACC_id (0); + a[i] = GANG_ID (i); test_static (a, 10, 20); /* Non-static gang. */ #pragma acc parallel loop gang num_gangs (10) for (i = 0; i < 100; i++) - a[i] = __builtin_GOACC_id (0); + a[i] = GANG_ID (i); test_nonstatic (a, 10);