diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 49cc681..afc3a890 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -2879,6 +2879,7 @@ nvptx_mem_maybe_shared_p (const_rtx x) t -- print a type opcode suffix, promoting QImode to 32 bits T -- print a type size in bits u -- print a type opcode suffix without promotions. + p -- print a '!' for constant 0. x -- print a destination operand that may also be a bit bucket. */ static void @@ -3012,6 +3013,11 @@ nvptx_print_operand (FILE *file, rtx x, int code) fprintf (file, "@!"); goto common; + case 'p': + if (INTVAL (x) == 0) + fprintf (file, "!"); + break; + case 'c': mode = GET_MODE (XEXP (x, 0)); switch (x_code) @@ -6151,9 +6157,90 @@ enum nvptx_builtins NVPTX_BUILTIN_CMP_SWAPLL, NVPTX_BUILTIN_MEMBAR_GL, NVPTX_BUILTIN_MEMBAR_CTA, + NVPTX_BUILTIN_BAR_RED_AND, + NVPTX_BUILTIN_BAR_RED_OR, + NVPTX_BUILTIN_BAR_RED_POPC, NVPTX_BUILTIN_MAX }; +/* Expander for 'bar.red' instruction builtins. */ + +static rtx +nvptx_expand_bar_red (tree exp, rtx target, + machine_mode ARG_UNUSED (m), int ARG_UNUSED (ignore)) +{ + int code = DECL_MD_FUNCTION_CODE (TREE_OPERAND (CALL_EXPR_FN (exp), 0)); + machine_mode mode = TYPE_MODE (TREE_TYPE (exp)); + + if (!target) + target = gen_reg_rtx (mode); + + rtx pred, dst; + rtx bar = expand_expr (CALL_EXPR_ARG (exp, 0), + NULL_RTX, SImode, EXPAND_NORMAL); + rtx nthr = expand_expr (CALL_EXPR_ARG (exp, 1), + NULL_RTX, SImode, EXPAND_NORMAL); + rtx cpl = expand_expr (CALL_EXPR_ARG (exp, 2), + NULL_RTX, SImode, EXPAND_NORMAL); + rtx redop = expand_expr (CALL_EXPR_ARG (exp, 3), + NULL_RTX, SImode, EXPAND_NORMAL); + if (CONST_INT_P (bar)) + { + if (INTVAL (bar) < 0 || INTVAL (bar) > 15) + { + error_at (EXPR_LOCATION (exp), + "barrier value must be within [0,15]"); + return const0_rtx; + } + } + else if (!REG_P (bar)) + bar = copy_to_mode_reg (SImode, bar); + + if (!CONST_INT_P (nthr) && !REG_P (nthr)) + nthr = copy_to_mode_reg (SImode, nthr); + + if (!CONST_INT_P (cpl)) + { + error_at (EXPR_LOCATION (exp), + "complement argument must be constant"); + return const0_rtx; + } + + pred = gen_reg_rtx (BImode); + if (!REG_P (redop)) + redop = copy_to_mode_reg (SImode, redop); + emit_insn (gen_rtx_SET (pred, gen_rtx_NE (BImode, redop, GEN_INT (0)))); + redop = pred; + + rtx pat; + switch (code) + { + case NVPTX_BUILTIN_BAR_RED_AND: + dst = gen_reg_rtx (BImode); + pat = gen_nvptx_barred_and (dst, bar, nthr, cpl, redop); + break; + case NVPTX_BUILTIN_BAR_RED_OR: + dst = gen_reg_rtx (BImode); + pat = gen_nvptx_barred_or (dst, bar, nthr, cpl, redop); + break; + case NVPTX_BUILTIN_BAR_RED_POPC: + dst = gen_reg_rtx (SImode); + pat = gen_nvptx_barred_popc (dst, bar, nthr, cpl, redop); + break; + default: + gcc_unreachable (); + } + emit_insn (pat); + if (GET_MODE (dst) == BImode) + { + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_rtx_SET (tmp, gen_rtx_NE (mode, dst, GEN_INT (0)))); + dst = tmp; + } + emit_move_insn (target, dst); + return target; +} + static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX]; /* Return the NVPTX builtin for CODE. */ @@ -6194,6 +6281,13 @@ nvptx_init_builtins (void) DEF (MEMBAR_GL, "membar_gl", (VOID, VOID, NULL_TREE)); DEF (MEMBAR_CTA, "membar_cta", (VOID, VOID, NULL_TREE)); + DEF (BAR_RED_AND, "bar_red_and", + (UINT, UINT, UINT, UINT, UINT, NULL_TREE)); + DEF (BAR_RED_OR, "bar_red_or", + (UINT, UINT, UINT, UINT, UINT, NULL_TREE)); + DEF (BAR_RED_POPC, "bar_red_popc", + (UINT, UINT, UINT, UINT, UINT, NULL_TREE)); + #undef DEF #undef ST #undef UINT @@ -6236,6 +6330,11 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), emit_insn (gen_nvptx_membar_cta ()); return NULL_RTX; + case NVPTX_BUILTIN_BAR_RED_AND: + case NVPTX_BUILTIN_BAR_RED_OR: + case NVPTX_BUILTIN_BAR_RED_POPC: + return nvptx_expand_bar_red (exp, target, mode, ignore); + default: gcc_unreachable (); } } diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 8ed6850..740c4de 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -58,6 +58,9 @@ UNSPECV_CAS_LOCAL UNSPECV_XCHG UNSPECV_ST + UNSPECV_BARRED_AND + UNSPECV_BARRED_OR + UNSPECV_BARRED_POPC UNSPECV_BARSYNC UNSPECV_WARPSYNC UNSPECV_UNIFORM_WARP_CHECK @@ -2274,6 +2277,35 @@ "TARGET_PTX_6_0" "%.\\tbar.warp.sync\\t0xffffffff;") +(define_int_iterator BARRED + [UNSPECV_BARRED_AND + UNSPECV_BARRED_OR + UNSPECV_BARRED_POPC]) +(define_int_attr barred_op + [(UNSPECV_BARRED_AND "and") + (UNSPECV_BARRED_OR "or") + (UNSPECV_BARRED_POPC "popc")]) +(define_int_attr barred_mode + [(UNSPECV_BARRED_AND "BI") + (UNSPECV_BARRED_OR "BI") + (UNSPECV_BARRED_POPC "SI")]) +(define_int_attr barred_ptxtype + [(UNSPECV_BARRED_AND "pred") + (UNSPECV_BARRED_OR "pred") + (UNSPECV_BARRED_POPC "u32")]) + +(define_insn "nvptx_barred_" + [(set (match_operand: 0 "nvptx_register_operand" "=R") + (unspec_volatile + [(match_operand:SI 1 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 3 "const_int_operand" "i") + (match_operand:BI 4 "nvptx_register_operand" "R")] + BARRED))] + "" + "\\tbar.red.. \\t%0, %1, %2, %p3%4;";" + [(set_attr "predicable" "no")]) + (define_insn "nvptx_uniform_warp_check" [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)] ""