2015-08-17 Nathan Sidwell * target.def (lock_unlock): New GOACC hook. * targhooks.h (default_goacc_lock_unlock): Declare. * doc/tm.texi.in (TARGET_GOACC_LOCK_UNLOCK): Add. * doc/tm.texi: Rebuilt. * internal-fn.def (GOACC_LOCK, GOACC_UNLOCK): New. * internal-fn.c (expand_GOACC_LOCK, expand_GOACC_UNLOCK): New. * omp-low.c (execute_oacc_transform): Add lock/unlock handling. (default_goacc_lock_unlock): New. * config/nvptx/nvptx-protos.h (nvptx_expand_oacc_lock_unlock): Declare. * config/nvptx/nvptx.md (UNSPECV_UNLOCK): Delete. (oacc_lock, oacc_unlock): New expanders. (nvptx_spinlock, nvptx_spinunlock): Use UNSPECV_LOCK. * config/nvptx/nvptx.c (nvptx_expand_oacc_lock_unlock): New. (nvptx_expand_lock_unlock): Delete. (nvptx_expand_lock, nvptx_expand_unlock): Delete. (nvptx_expand_work_red_addr): Fixup address generation. (enum nvptx_types): Delete NT_VOID_UINT. (builtins): Delete nvptx_lock and nvptx_unlock. (nvptx_init_builtins): Adjust. (nvptx_xform_lock_unlock): New. (TARGET_GOACC_LOCK_UNLOCK): Override. Index: gcc/config/nvptx/nvptx-protos.h =================================================================== --- gcc/config/nvptx/nvptx-protos.h (revision 226951) +++ gcc/config/nvptx/nvptx-protos.h (working copy) @@ -34,6 +34,7 @@ extern const char *nvptx_section_for_dec #ifdef RTX_CODE extern void nvptx_expand_oacc_fork (rtx); extern void nvptx_expand_oacc_join (rtx); +extern void nvptx_expand_oacc_lock_unlock (rtx, bool); extern void nvptx_expand_call (rtx, rtx); extern rtx nvptx_expand_compare (rtx); extern const char *nvptx_ptx_type_from_mode (machine_mode, bool); Index: gcc/config/nvptx/nvptx.md =================================================================== --- gcc/config/nvptx/nvptx.md (revision 226951) +++ gcc/config/nvptx/nvptx.md (working copy) @@ -61,7 +61,6 @@ (define_c_enum "unspecv" [ UNSPECV_LOCK - UNSPECV_UNLOCK UNSPECV_CAS UNSPECV_XCHG UNSPECV_BARSYNC @@ -1366,6 +1365,26 @@ return asms[INTVAL (operands[1])]; }) +(define_expand "oacc_lock" + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "") + (match_operand:SI 1 "const_int_operand" "")] + UNSPECV_LOCK)] + "" +{ + nvptx_expand_oacc_lock_unlock (operands[0], true); + DONE; +}) + +(define_expand "oacc_unlock" + [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "") + (match_operand:SI 1 "const_int_operand" "")] + UNSPECV_LOCK)] + "" +{ + nvptx_expand_oacc_lock_unlock (operands[0], false); + DONE; +}) + (define_insn "nvptx_fork" [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_FORK)] @@ -1576,7 +1595,7 @@ [(parallel [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m") (match_operand:SI 1 "const_int_operand" "i")] - UNSPECV_UNLOCK) + UNSPECV_LOCK) (match_operand:SI 2 "register_operand" "=R") (match_operand:BI 3 "register_operand" "=R") (label_ref (match_operand 4 "" ""))])] @@ -1586,7 +1605,7 @@ (define_insn "nvptx_spinunlock" [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m") (match_operand:SI 1 "const_int_operand" "i")] - UNSPECV_UNLOCK) + UNSPECV_LOCK) (match_operand:SI 2 "register_operand" "=R")] "" "atom%R1.exch.b32 %2,%0,0;") Index: gcc/config/nvptx/nvptx.c =================================================================== --- gcc/config/nvptx/nvptx.c (revision 226951) +++ gcc/config/nvptx/nvptx.c (working copy) @@ -1164,6 +1164,39 @@ nvptx_expand_oacc_join (rtx mode) emit_insn (gen_nvptx_joining (mode)); } +/* Expander for reduction locking and unlocking. We expect SRC to be + gang or worker level. */ + +void +nvptx_expand_oacc_lock_unlock (rtx src, bool lock) +{ + unsigned HOST_WIDE_INT kind; + rtx pat; + + kind = INTVAL (src) == GOMP_DIM_GANG ? LOCK_GLOBAL : LOCK_SHARED; + lock_used[kind] = true; + + rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]); + rtx space = GEN_INT (lock_space[kind]); + rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind])); + rtx tmp = gen_reg_rtx (SImode); + + if (!lock) + emit_insn (barrier); + if (lock) + { + rtx_code_label *label = gen_label_rtx (); + + LABEL_NUSES (label)++; + pat = gen_nvptx_spinlock (mem, space, tmp, gen_reg_rtx (BImode), label); + } + else + pat = gen_nvptx_spinunlock (mem, space, tmp); + emit_insn (pat); + if (lock) + emit_insn (barrier); +} + /* Generate instruction(s) to unpack a 64 bit object into 2 32 bit objects. */ @@ -3306,62 +3339,6 @@ nvptx_expand_shuffle_down (tree exp, rtx return target; } -/* Expander for locking and unlocking. */ -static rtx -nvptx_expand_lock_unlock (tree exp, bool lock) -{ - rtx src = expand_expr (CALL_EXPR_ARG (exp, 0), - NULL_RTX, SImode, EXPAND_NORMAL); - unsigned HOST_WIDE_INT kind; - rtx pat; - - kind = GET_CODE (src) == CONST_INT ? INTVAL (src) : LOCK_MAX; - if (kind >= LOCK_MAX) - error ("builtin %D requires constant argument less than %u", - get_callee_fndecl (exp), LOCK_MAX); - lock_used[kind] = true; - - rtx mem = gen_rtx_MEM (SImode, lock_syms[kind]); - rtx space = GEN_INT (lock_space[kind]); - rtx barrier = gen_nvptx_membar (GEN_INT (lock_level[kind])); - - if (!lock) - emit_insn (barrier); - if (lock) - { - rtx_code_label *label = gen_label_rtx (); - - LABEL_NUSES (label)++; - pat = gen_nvptx_spinlock (mem, space, - gen_reg_rtx (SImode), gen_reg_rtx (BImode), - label); - } - else - pat = gen_nvptx_spinunlock (mem, space, gen_reg_rtx (SImode)); - emit_insn (pat); - if (lock) - emit_insn (barrier); - return const0_rtx; -} - -/* Lock expander. */ - -static rtx -nvptx_expand_lock (tree exp, rtx ARG_UNUSED (target), - machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore)) -{ - return nvptx_expand_lock_unlock (exp, true); -} - -/* Unlock expander. */ - -static rtx -nvptx_expand_unlock (tree exp, rtx ARG_UNUSED (target), - machine_mode ARG_UNUSED (mode), int ARG_UNUSED (ignore)) -{ - return nvptx_expand_lock_unlock (exp, false); -} - /* Worker reduction address expander. */ static rtx nvptx_expand_work_red_addr (tree exp, rtx target, @@ -3413,12 +3390,16 @@ nvptx_expand_work_red_addr (tree exp, rt /* Return offset into worker reduction array. */ unsigned offset = loop.vars[ix].second; - rtx addr = gen_reg_rtx (Pmode); - emit_move_insn (addr, - gen_rtx_PLUS (Pmode, worker_red_sym, GEN_INT (offset))); + emit_insn (gen_rtx_SET (target, worker_red_sym)); + + if (offset) + emit_insn (gen_rtx_SET (target, + gen_rtx_PLUS (Pmode, target, GEN_INT (offset)))); + emit_insn (gen_rtx_SET (target, - gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), + gen_rtx_UNSPEC (Pmode, gen_rtvec (1, target), UNSPEC_FROM_SHARED))); + return target; } @@ -3428,7 +3409,6 @@ enum nvptx_types NT_ULL_ULL_INT, NT_FLT_FLT_INT, NT_DBL_DBL_INT, - NT_VOID_UINT, NT_UINTPTR_UINT_UINT, NT_ULLPTR_UINT_UINT, NT_FLTPTR_UINT_UINT, @@ -3446,8 +3426,6 @@ static const struct builtin_description nvptx_expand_shuffle_down}, {"__builtin_nvptx_shuffle_downd", NT_DBL_DBL_INT, nvptx_expand_shuffle_down}, - {"__builtin_nvptx_lock", NT_VOID_UINT, nvptx_expand_lock}, - {"__builtin_nvptx_unlock", NT_VOID_UINT, nvptx_expand_unlock}, {"__builtin_nvptx_work_red_addr", NT_UINTPTR_UINT_UINT, nvptx_expand_work_red_addr}, {"__builtin_nvptx_work_red_addrll", NT_ULLPTR_UINT_UINT, @@ -3492,9 +3470,6 @@ nvptx_init_builtins (void) types[NT_DBL_DBL_INT] = build_function_type_list (double_type_node, double_type_node, integer_type_node, NULL_TREE); - types[NT_VOID_UINT] - = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); - types[NT_UINTPTR_UINT_UINT] = build_function_type_list (build_pointer_type (unsigned_type_node), unsigned_type_node, unsigned_type_node, @@ -3628,6 +3603,20 @@ nvptx_xform_fork_join (gimple_stmt_itera return false; } + +/* Check lock & unlock. We only need the gang- & worker-level ones. + */ + +static bool +nvptx_xform_lock_unlock (gimple_stmt_iterator *ARG_UNUSED (gsi), + gimple stmt, + const int *ARG_UNUSED (dims), + bool ARG_UNUSED (is_fork)) +{ + tree arg = gimple_call_arg (stmt, 0); + + return TREE_INT_CST_LOW (arg) > GOMP_DIM_WORKER; +} #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override @@ -3732,6 +3721,9 @@ nvptx_xform_fork_join (gimple_stmt_itera #undef TARGET_GOACC_FORK_JOIN #define TARGET_GOACC_FORK_JOIN nvptx_xform_fork_join +#undef TARGET_GOACC_LOCK_UNLOCK +#define TARGET_GOACC_LOCK_UNLOCK nvptx_xform_lock_unlock + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-nvptx.h" Index: gcc/targhooks.h =================================================================== --- gcc/targhooks.h (revision 226951) +++ gcc/targhooks.h (working copy) @@ -111,6 +111,8 @@ extern bool default_goacc_validate_dims extern unsigned default_goacc_dim_limit (unsigned); extern bool default_goacc_fork_join (gimple_stmt_iterator *, gimple, const int [], bool); +extern bool default_goacc_lock_unlock (gimple_stmt_iterator *, gimple, + const int [], bool); /* These are here, and not in hooks.[ch], because not all users of hooks.h include tm.h, and thus we don't have CUMULATIVE_ARGS. */ Index: gcc/target.def =================================================================== --- gcc/target.def (revision 226951) +++ gcc/target.def (working copy) @@ -1670,6 +1670,15 @@ default hook returns true, if there is n bool, (gimple_stmt_iterator *, gimple, const int[], bool), default_goacc_fork_join) +DEFHOOK +(lock_unlock, +"This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function\n\ +calls to target-specific gimple. It is executed during the oacc_xform\n\ +pass. It should return true, if the functions should be deleted. The\n\ +default hook returns true, if there is no RTL expanders for them.", +bool, (gimple_stmt_iterator *, gimple, const int[], bool), +default_goacc_lock_unlock) + HOOK_VECTOR_END (goacc) /* Functions relating to vectorization. */ Index: gcc/internal-fn.def =================================================================== --- gcc/internal-fn.def (revision 226951) +++ gcc/internal-fn.def (working copy) @@ -83,3 +83,9 @@ DEF_INTERNAL_FN (GOACC_JOIN, ECF_NOTHROW single INTEGER_CST argument. */ DEF_INTERNAL_FN (GOACC_DIM_SIZE, ECF_CONST | ECF_NOTHROW | ECF_LEAF, ".") DEF_INTERNAL_FN (GOACC_DIM_POS, ECF_PURE | ECF_NOTHROW | ECF_LEAF, ".") + +/* LOCK and UNLOCK operate a mutex used for reductions. The first + argument is the compute dimension of the reduction and the second + argument is a loop identifer. */ +DEF_INTERNAL_FN (GOACC_LOCK, ECF_NOTHROW | ECF_LEAF, "..") +DEF_INTERNAL_FN (GOACC_UNLOCK, ECF_NOTHROW | ECF_LEAF, "..") Index: gcc/omp-low.c =================================================================== --- gcc/omp-low.c (revision 226951) +++ gcc/omp-low.c (working copy) @@ -14743,19 +14743,24 @@ execute_oacc_transform () { default: break; + case IFN_GOACC_DIM_POS: case IFN_GOACC_DIM_SIZE: - oacc_xform_dim (&gsi, stmt, dims, false); + oacc_xform_dim (&gsi, stmt, dims, + ifn_code == IFN_GOACC_DIM_POS); break; - case IFN_GOACC_DIM_POS: - oacc_xform_dim (&gsi, stmt, dims, true); - break; + case IFN_GOACC_LOCK: + case IFN_GOACC_UNLOCK: + if (targetm.goacc.lock_unlock + (&gsi, stmt, dims, ifn_code == IFN_GOACC_LOCK)) + goto remove; case IFN_GOACC_FORK: case IFN_GOACC_JOIN: if (targetm.goacc.fork_join (&gsi, stmt, dims, ifn_code == IFN_GOACC_FORK)) { + remove: replace_uses_by (gimple_vdef (stmt), gimple_vuse (stmt)); gsi_remove (&gsi, true); @@ -14814,7 +14819,6 @@ default_goacc_fork_join (gimple_stmt_ite gimple ARG_UNUSED (stmt), const int *ARG_UNUSED (dims), bool is_fork) { - /* If there is no expander, we can delete the functions. */ if (is_fork) { #ifndef HAVE_oacc_fork @@ -14827,6 +14831,31 @@ default_goacc_fork_join (gimple_stmt_ite return true; #endif } + + return false; +} + +/* Default lock/unlock early expander. Delete the function calls if + there is no RTL expander. */ + +bool +default_goacc_lock_unlock (gimple_stmt_iterator *ARG_UNUSED (gsi), + gimple ARG_UNUSED (stmt), + const int*ARG_UNUSED (dims), + bool is_lock) +{ + if (is_lock) + { +#ifndef HAVE_oacc_lock + return true; +#endif + } + else + { +#ifndef HAVE_oacc_unlock + return true; +#endif + } return false; } Index: gcc/internal-fn.c =================================================================== --- gcc/internal-fn.c (revision 226951) +++ gcc/internal-fn.c (working copy) @@ -2025,6 +2025,32 @@ expand_GOACC_DIM_POS (gcall *ARG_UNUSED #endif } +static void +expand_GOACC_LOCK (gcall *ARG_UNUSED (stmt)) +{ +#ifdef HAVE_oacc_lock + rtx dim = expand_normal (gimple_call_arg (stmt, 0)); + rtx id = expand_normal (gimple_call_arg (stmt, 1)); + + emit_insn (gen_oacc_lock (dim, id)); +#else + gcc_unreachable (); +#endif +} + +static void +expand_GOACC_UNLOCK (gcall *ARG_UNUSED (stmt)) +{ +#ifdef HAVE_oacc_unlock + rtx dim = expand_normal (gimple_call_arg (stmt, 0)); + rtx id = expand_normal (gimple_call_arg (stmt, 1)); + + emit_insn (gen_oacc_unlock (dim, id)); +#else + gcc_unreachable (); +#endif +} + /* Routines to expand each internal function, indexed by function number. Each routine has the prototype: Index: gcc/doc/tm.texi =================================================================== --- gcc/doc/tm.texi (revision 226951) +++ gcc/doc/tm.texi (working copy) @@ -5760,6 +5760,13 @@ pass. It should return true, if the fun default hook returns true, if there is no RTL expanders for them. @end deftypefn +@deftypefn {Target Hook} bool TARGET_GOACC_LOCK_UNLOCK (gimple_stmt_iterator *@var{}, @var{gimple}, const @var{int[]}, @var{bool}) +This hook should convert IFN_GOACC_LOCK and IFN_GOACC_UNLOCK function +calls to target-specific gimple. It is executed during the oacc_xform +pass. It should return true, if the functions should be deleted. The +default hook returns true, if there is no RTL expanders for them. +@end deftypefn + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses Index: gcc/doc/tm.texi.in =================================================================== --- gcc/doc/tm.texi.in (revision 226951) +++ gcc/doc/tm.texi.in (working copy) @@ -4251,6 +4251,8 @@ address; but often a machine-dependent @hook TARGET_GOACC_FORK_JOIN +@hook TARGET_GOACC_LOCK_UNLOCK + @node Anchored Addresses @section Anchored Addresses @cindex anchored addresses