* [gomp4] New nvptx pattern and internal builtin
@ 2015-07-17 15:44 Bernd Schmidt
2015-07-20 13:45 ` Nathan Sidwell
0 siblings, 1 reply; 3+ messages in thread
From: Bernd Schmidt @ 2015-07-17 15:44 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 279 bytes --]
I've made this change at the request of Cesar who says it's needed for
his reductions work. It makes a new instruction to represent shfl.down,
a thread communication instruction, and some builtin functions for
internal use to access it.
Committed on gomp-4_0-branch.
Bernd
[-- Attachment #2: shfldown2.diff --]
[-- Type: text/x-patch, Size: 7913 bytes --]
Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp (revision 225936)
+++ gcc/ChangeLog.gomp (working copy)
@@ -1,3 +1,17 @@
+2015-07-17 Bernd Schmidt <bernds@codesourcery.com>
+
+ * config/nvptx/nvptx.c (enum nvptx_builtins,
+ struct builtin_description): New.
+ (nvptx_builtin_decls, bdesc_2arg): New static variables.
+ (def_builtin): New macro.
+ (nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_binop_builtin,
+ nvptx_expand_builtin): New static functions.
+ (TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN, TARGET_BUILTIN_DECL):
+ Define.
+ * config/nvptx/nvptx.md (UNSPECV_SHFL_DOWN): New constant.
+ (thread_shuffle_down<mode>): New pattern.
+ (thread_shuffle_downdi): New expander.
+
2015-07-17 Julian Brown <julian@codesourcery.com>
* gimplify.c (gimplify_scan_omp_clauses): Handle
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c (revision 225936)
+++ gcc/config/nvptx/nvptx.c (working copy)
@@ -3058,6 +3058,139 @@ nvptx_file_end (void)
}
}
\f
+/* Codes for all the NVPTX builtins. */
+enum nvptx_builtins
+{
+ NVPTX_BUILTIN_SHUFFLE_DOWN,
+ NVPTX_BUILTIN_SHUFFLE_DOWNF,
+ NVPTX_BUILTIN_SHUFFLE_DOWNLL,
+
+ NVPTX_BUILTIN_MAX
+};
+
+
+static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
+
+/* Return the NVPTX builtin for CODE. */
+static tree
+nvptx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+ if (code >= NVPTX_BUILTIN_MAX)
+ return error_mark_node;
+
+ return nvptx_builtin_decls[code];
+}
+
+#define def_builtin(NAME, TYPE, CODE) \
+do { \
+ tree bdecl; \
+ bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
+ NULL, NULL_TREE); \
+ nvptx_builtin_decls[CODE] = bdecl; \
+} while (0)
+
+/* Set up all builtin functions for this target. */
+static void
+nvptx_init_builtins (void)
+{
+ tree uint_ftype_uint_int
+ = build_function_type_list (unsigned_type_node, unsigned_type_node,
+ integer_type_node, NULL_TREE);
+ tree ull_ftype_ull_int
+ = build_function_type_list (long_long_unsigned_type_node,
+ long_long_unsigned_type_node,
+ integer_type_node, NULL_TREE);
+ tree float_ftype_float_int
+ = build_function_type_list (float_type_node, float_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_nvptx_shuffle_down", uint_ftype_uint_int,
+ NVPTX_BUILTIN_SHUFFLE_DOWN);
+ def_builtin ("__builtin_nvptx_shuffle_downf", float_ftype_float_int,
+ NVPTX_BUILTIN_SHUFFLE_DOWNF);
+ def_builtin ("__builtin_nvptx_shuffle_downll", ull_ftype_ull_int,
+ NVPTX_BUILTIN_SHUFFLE_DOWNLL);
+}
+
+/* Subroutine of nvptx_expand_builtin to take care of binop insns. MACFLAG is -1
+ if this is a normal binary op, or one of the MACFLAG_xxx constants. */
+
+static rtx
+nvptx_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+ rtx pat;
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+ rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+ machine_mode op0mode = GET_MODE (op0);
+ machine_mode op1mode = GET_MODE (op1);
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+ machine_mode mode0 = insn_data[icode].operand[1].mode;
+ machine_mode mode1 = insn_data[icode].operand[2].mode;
+ rtx ret = target;
+
+ if (! target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
+ && (op1mode == mode1 || op1mode == VOIDmode));
+
+ if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+ op0 = copy_to_mode_reg (mode0, op0);
+ if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+ op1 = copy_to_mode_reg (mode1, op1);
+
+ pat = GEN_FCN (icode) (target, op0, op1);
+
+ if (! pat)
+ return 0;
+
+ emit_insn (pat);
+
+ return ret;
+}
+
+
+struct builtin_description
+{
+ const enum insn_code icode;
+ const char *const name;
+ const enum nvptx_builtins code;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+ { CODE_FOR_thread_shuffle_downsi, "__builtin_nvptx_shuffle_down", NVPTX_BUILTIN_SHUFFLE_DOWN },
+ { CODE_FOR_thread_shuffle_downsf, "__builtin_nvptx_shuffle_downf", NVPTX_BUILTIN_SHUFFLE_DOWNF },
+ { CODE_FOR_thread_shuffle_downdi, "__builtin_nvptx_shuffle_downll", NVPTX_BUILTIN_SHUFFLE_DOWNLL }
+};
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+nvptx_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+ rtx subtarget ATTRIBUTE_UNUSED,
+ machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+ size_t i;
+ const struct builtin_description *d;
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+ for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return nvptx_expand_binop_builtin (d->icode, exp, target);
+
+ gcc_unreachable ();
+}
+\f
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE nvptx_option_override
@@ -3145,6 +3278,13 @@ nvptx_file_end (void)
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS nvptx_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN nvptx_expand_builtin
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL nvptx_builtin_decl
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-nvptx.h"
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md (revision 225936)
+++ gcc/config/nvptx/nvptx.md (working copy)
@@ -70,6 +70,8 @@ (define_c_enum "unspecv" [
UNSPECV_FORKED
UNSPECV_JOINING
UNSPECV_JOIN
+
+ UNSPECV_SHFL_DOWN
])
(define_attr "subregs_ok" "false,true"
@@ -1416,6 +1418,39 @@ (define_insn "nvptx_broadcast<mode>"
""
"%.\\tshfl.idx.b32\\t%0, %1, 0, 31;")
+(define_insn "thread_shuffle_down<mode>"
+ [(set (match_operand:BITS 0 "nvptx_register_operand" "")
+ (unspec_volatile:BITS [(match_operand:SI 1 "nvptx_register_operand" "")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "")]
+ UNSPECV_SHFL_DOWN))]
+ ""
+ "%.\\tshfl.down.b32\\t%0, %1, %2, 31;")
+
+(define_expand "thread_shuffle_downdi"
+ [(set (match_operand:DI 0 "nvptx_register_operand" "")
+ (unspec_volatile:DI [(match_operand:DI 1 "nvptx_register_operand" "")
+ (match_operand:SI 2 "nvptx_nonmemory_operand" "")]
+ UNSPECV_SHFL_DOWN))]
+ ""
+{
+ rtx t = gen_reg_rtx (DImode);
+ emit_insn (gen_lshrdi3 (t, operands[1], GEN_INT (32)));
+ rtx op0 = force_reg (SImode, gen_lowpart (SImode, t));
+ rtx op1 = force_reg (SImode, gen_lowpart (SImode, operands[1]));
+ rtx targ0 = gen_reg_rtx (SImode);
+ rtx targ1 = gen_reg_rtx (SImode);
+ emit_insn (gen_thread_shuffle_downsi (targ0, op0, operands[2]));
+ emit_insn (gen_thread_shuffle_downsi (targ1, op1, operands[2]));
+ rtx t2 = gen_reg_rtx (DImode);
+ rtx t3 = gen_reg_rtx (DImode);
+ emit_insn (gen_extendsidi2 (t2, targ0));
+ emit_insn (gen_extendsidi2 (t3, targ1));
+ rtx t4 = gen_reg_rtx (DImode);
+ emit_insn (gen_ashldi3 (t4, t2, GEN_INT (32)));
+ emit_insn (gen_iordi3 (operands[0], t3, t4));
+ DONE;
+})
+
;; extract parts of a 64 bit object into 2 32-bit ints
(define_insn "unpack<mode>si2"
[(set (match_operand:SI 0 "nvptx_register_operand" "")
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [gomp4] New nvptx pattern and internal builtin
2015-07-17 15:44 [gomp4] New nvptx pattern and internal builtin Bernd Schmidt
@ 2015-07-20 13:45 ` Nathan Sidwell
2015-07-20 14:10 ` Bernd Schmidt
0 siblings, 1 reply; 3+ messages in thread
From: Nathan Sidwell @ 2015-07-20 13:45 UTC (permalink / raw)
To: Bernd Schmidt, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 732 bytes --]
On 07/17/15 11:37, Bernd Schmidt wrote:
> I've made this change at the request of Cesar who says it's needed for his
> reductions work. It makes a new instruction to represent shfl.down, a thread
> communication instruction, and some builtin functions for internal use to access
> it.
I was looking at adding another target builtin, and found this code rather
convoluted. It seemed to have been cloned from somewhere more complicated --
for instance, nvptx_expand_binop_builtin's comment discusses a MACFLAG argument,
which is nowhere to be seen.
I ended up reimplementing using a single array describing the builtins and
allowing direct indexing using the builtin number, rather than iteration when
expanding.
ok?
nathan
[-- Attachment #2: gomp4-blt.patch --]
[-- Type: text/x-patch, Size: 6832 bytes --]
2015-07-20 Nathan Sidwell <nathan@codesourcery.com>
* config/nvptx/nvptx.c (nvptx_builtins): Delete enum.
(nvptx_types): New enum.
(builtin_description): Add type and num_args fields.
(builtins): New array describing builtins.
(NVPTX_BUILTIN_MAX): Define.
(def_builtin): Delete.
(nvptx_init_builtins): Reimplement using builtins array.
(nvptx_expand_binop_builtin): Delete.
(bdesc_2arg): Delete.
(nvptx_expand_builtin): Reimplement using builtins array.
Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c (revision 225992)
+++ config/nvptx/nvptx.c (working copy)
@@ -3058,16 +3058,34 @@ nvptx_file_end (void)
}
}
\f
-/* Codes for all the NVPTX builtins. */
-enum nvptx_builtins
+enum nvptx_types
+ {
+ NT_UINT_UINT_INT,
+ NT_ULL_ULL_INT,
+ NT_FLT_FLT_INT,
+
+ NT_MAX
+ };
+
+struct builtin_description
{
- NVPTX_BUILTIN_SHUFFLE_DOWN,
- NVPTX_BUILTIN_SHUFFLE_DOWNF,
- NVPTX_BUILTIN_SHUFFLE_DOWNLL,
+ const char *name;
+ enum insn_code icode;
+ unsigned short type;
+ unsigned short num_args;
+};
- NVPTX_BUILTIN_MAX
+static const struct builtin_description builtins[] =
+{
+ {"__builtin_nvptx_shuffle_down", CODE_FOR_thread_shuffle_downsi,
+ NT_UINT_UINT_INT, 2},
+ {"__builtin_nvptx_shuffle_downf", CODE_FOR_thread_shuffle_downsf,
+ NT_FLT_FLT_INT, 2},
+ { "__builtin_nvptx_shuffle_downll", CODE_FOR_thread_shuffle_downdi,
+ NT_ULL_ULL_INT, 2},
};
+#define NVPTX_BUILTIN_MAX (sizeof (builtins) / sizeof (builtins[0]))
static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
@@ -3081,92 +3099,30 @@ nvptx_builtin_decl (unsigned code, bool
return nvptx_builtin_decls[code];
}
-#define def_builtin(NAME, TYPE, CODE) \
-do { \
- tree bdecl; \
- bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
- NULL, NULL_TREE); \
- nvptx_builtin_decls[CODE] = bdecl; \
-} while (0)
-
/* Set up all builtin functions for this target. */
static void
nvptx_init_builtins (void)
-{
- tree uint_ftype_uint_int
+{
+ tree types[NT_MAX];
+ unsigned ix;
+
+ types[NT_UINT_UINT_INT]
= build_function_type_list (unsigned_type_node, unsigned_type_node,
integer_type_node, NULL_TREE);
- tree ull_ftype_ull_int
+ types[NT_ULL_ULL_INT]
= build_function_type_list (long_long_unsigned_type_node,
long_long_unsigned_type_node,
integer_type_node, NULL_TREE);
- tree float_ftype_float_int
+ types[NT_FLT_FLT_INT]
= build_function_type_list (float_type_node, float_type_node,
integer_type_node, NULL_TREE);
- def_builtin ("__builtin_nvptx_shuffle_down", uint_ftype_uint_int,
- NVPTX_BUILTIN_SHUFFLE_DOWN);
- def_builtin ("__builtin_nvptx_shuffle_downf", float_ftype_float_int,
- NVPTX_BUILTIN_SHUFFLE_DOWNF);
- def_builtin ("__builtin_nvptx_shuffle_downll", ull_ftype_ull_int,
- NVPTX_BUILTIN_SHUFFLE_DOWNLL);
-}
-
-/* Subroutine of nvptx_expand_builtin to take care of binop insns. MACFLAG is -1
- if this is a normal binary op, or one of the MACFLAG_xxx constants. */
-
-static rtx
-nvptx_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
-{
- rtx pat;
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree arg1 = CALL_EXPR_ARG (exp, 1);
- rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
- rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
- machine_mode op0mode = GET_MODE (op0);
- machine_mode op1mode = GET_MODE (op1);
- machine_mode tmode = insn_data[icode].operand[0].mode;
- machine_mode mode0 = insn_data[icode].operand[1].mode;
- machine_mode mode1 = insn_data[icode].operand[2].mode;
- rtx ret = target;
-
- if (! target
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
-
- gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
- && (op1mode == mode1 || op1mode == VOIDmode));
- if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
- op0 = copy_to_mode_reg (mode0, op0);
- if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
- op1 = copy_to_mode_reg (mode1, op1);
-
- pat = GEN_FCN (icode) (target, op0, op1);
-
- if (! pat)
- return 0;
-
- emit_insn (pat);
-
- return ret;
+ for (ix = 0; ix != NVPTX_BUILTIN_MAX; ix++)
+ nvptx_builtin_decls[ix]
+ = add_builtin_function (builtins[ix].name, types[builtins[ix].type],
+ ix, BUILT_IN_MD, NULL, NULL_TREE);
}
-
-struct builtin_description
-{
- const enum insn_code icode;
- const char *const name;
- const enum nvptx_builtins code;
-};
-
-static const struct builtin_description bdesc_2arg[] =
-{
- { CODE_FOR_thread_shuffle_downsi, "__builtin_nvptx_shuffle_down", NVPTX_BUILTIN_SHUFFLE_DOWN },
- { CODE_FOR_thread_shuffle_downsf, "__builtin_nvptx_shuffle_downf", NVPTX_BUILTIN_SHUFFLE_DOWNF },
- { CODE_FOR_thread_shuffle_downdi, "__builtin_nvptx_shuffle_downll", NVPTX_BUILTIN_SHUFFLE_DOWNLL }
-};
-
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
@@ -3174,21 +3130,41 @@ static const struct builtin_description
IGNORE is nonzero if the value is to be ignored. */
static rtx
-nvptx_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+nvptx_expand_builtin (tree exp, rtx target,
rtx subtarget ATTRIBUTE_UNUSED,
- machine_mode mode ATTRIBUTE_UNUSED,
- int ignore ATTRIBUTE_UNUSED)
+ machine_mode mode,
+ int ignore)
{
- size_t i;
- const struct builtin_description *d;
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ const struct builtin_description *d = &builtins[DECL_FUNCTION_CODE (fndecl)];
+ unsigned icode = d->icode;
+ rtx operands[2]; /* maxium operands */
+ unsigned ix;
+ machine_mode tmode = insn_data[icode].operand[0].mode;
+
+ if (ignore)
+ return target;
+
+ if (! target
+ || mode != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ for (ix = d->num_args; ix--;)
+ {
+ machine_mode m = insn_data[icode].operand[ix + 1].mode;
+ rtx op = expand_expr (CALL_EXPR_ARG (exp, ix),
+ NULL_RTX, VOIDmode, EXPAND_NORMAL);
+ if (! (*insn_data[icode].operand[ix + 1].predicate) (op, m))
+ op = copy_to_mode_reg (m, op);
+ operands[ix] = op;
+ }
- for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return nvptx_expand_binop_builtin (d->icode, exp, target);
+ rtx pat = GEN_FCN (icode) (target, operands[0], operands[1]);
+ if (pat)
+ emit_insn (pat);
- gcc_unreachable ();
+ return target;
}
\f
#undef TARGET_OPTION_OVERRIDE
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [gomp4] New nvptx pattern and internal builtin
2015-07-20 13:45 ` Nathan Sidwell
@ 2015-07-20 14:10 ` Bernd Schmidt
0 siblings, 0 replies; 3+ messages in thread
From: Bernd Schmidt @ 2015-07-20 14:10 UTC (permalink / raw)
To: Nathan Sidwell, gcc-patches
On 07/20/2015 03:19 PM, Nathan Sidwell wrote:
> I was looking at adding another target builtin, and found this code
> rather convoluted. It seemed to have been cloned from somewhere more
> complicated -- for instance, nvptx_expand_binop_builtin's comment
> discusses a MACFLAG argument, which is nowhere to be seen.
Okay, I admit to tuning out comments for code that I know, and I didn't
notice that one. As for being convoluted - this is pretty much the
standard structure for the machine specific builtins which is used in a
lot of ports.
> I ended up reimplementing using a single array describing the builtins
> and allowing direct indexing using the builtin number, rather than
> iteration when expanding.
If you really want to, that's fine, but note the point about consistency
with other ports.
Bernd
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-07-20 13:38 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-17 15:44 [gomp4] New nvptx pattern and internal builtin Bernd Schmidt
2015-07-20 13:45 ` Nathan Sidwell
2015-07-20 14:10 ` Bernd Schmidt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).