public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] New nvptx pattern and internal builtin
@ 2015-07-17 15:44 Bernd Schmidt
  2015-07-20 13:45 ` Nathan Sidwell
  0 siblings, 1 reply; 3+ messages in thread
From: Bernd Schmidt @ 2015-07-17 15:44 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 279 bytes --]

I've made this change at the request of Cesar who says it's needed for 
his reductions work. It makes a new instruction to represent shfl.down, 
a thread communication instruction, and some builtin functions for 
internal use to access it.

Committed on gomp-4_0-branch.


Bernd

[-- Attachment #2: shfldown2.diff --]
[-- Type: text/x-patch, Size: 7913 bytes --]

Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp	(revision 225936)
+++ gcc/ChangeLog.gomp	(working copy)
@@ -1,3 +1,17 @@
+2015-07-17  Bernd Schmidt  <bernds@codesourcery.com>
+
+	* config/nvptx/nvptx.c (enum nvptx_builtins,
+	struct builtin_description): New.
+	(nvptx_builtin_decls, bdesc_2arg): New static variables.
+	(def_builtin): New macro.
+	(nvptx_builtin_decl, nvptx_init_builtins, nvptx_expand_binop_builtin,
+	nvptx_expand_builtin): New static functions.
+	(TARGET_INIT_BUILTINS, TARGET_EXPAND_BUILTIN, TARGET_BUILTIN_DECL):
+	Define.
+	* config/nvptx/nvptx.md (UNSPECV_SHFL_DOWN): New constant.
+	(thread_shuffle_down<mode>): New pattern.
+	(thread_shuffle_downdi): New expander.
+
 2015-07-17  Julian Brown  <julian@codesourcery.com>
 
 	* gimplify.c (gimplify_scan_omp_clauses): Handle
Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 225936)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -3058,6 +3058,139 @@ nvptx_file_end (void)
     }
 }
 \f
+/* Codes for all the NVPTX builtins.  */
+enum nvptx_builtins
+{
+  NVPTX_BUILTIN_SHUFFLE_DOWN,
+  NVPTX_BUILTIN_SHUFFLE_DOWNF,
+  NVPTX_BUILTIN_SHUFFLE_DOWNLL,
+
+  NVPTX_BUILTIN_MAX
+};
+
+
+static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
+
+/* Return the NVPTX builtin for CODE.  */
+static tree
+nvptx_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= NVPTX_BUILTIN_MAX)
+    return error_mark_node;
+
+  return nvptx_builtin_decls[code];
+}
+
+#define def_builtin(NAME, TYPE, CODE)					\
+do {									\
+  tree bdecl;								\
+  bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
+				NULL, NULL_TREE);			\
+  nvptx_builtin_decls[CODE] = bdecl;					\
+} while (0)
+
+/* Set up all builtin functions for this target.  */
+static void
+nvptx_init_builtins (void)
+{ 
+  tree uint_ftype_uint_int
+    = build_function_type_list (unsigned_type_node, unsigned_type_node,
+				integer_type_node, NULL_TREE);
+  tree ull_ftype_ull_int
+    = build_function_type_list (long_long_unsigned_type_node,
+				long_long_unsigned_type_node,
+				integer_type_node, NULL_TREE);
+  tree float_ftype_float_int
+    = build_function_type_list (float_type_node, float_type_node,
+				integer_type_node, NULL_TREE);
+  def_builtin ("__builtin_nvptx_shuffle_down", uint_ftype_uint_int,
+	       NVPTX_BUILTIN_SHUFFLE_DOWN);
+  def_builtin ("__builtin_nvptx_shuffle_downf", float_ftype_float_int,
+	       NVPTX_BUILTIN_SHUFFLE_DOWNF);
+  def_builtin ("__builtin_nvptx_shuffle_downll", ull_ftype_ull_int,
+	       NVPTX_BUILTIN_SHUFFLE_DOWNLL);
+}
+
+/* Subroutine of nvptx_expand_builtin to take care of binop insns.  MACFLAG is -1
+   if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
+
+static rtx
+nvptx_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
+{
+  rtx pat;
+  tree arg0 = CALL_EXPR_ARG (exp, 0);
+  tree arg1 = CALL_EXPR_ARG (exp, 1);
+  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+  machine_mode op0mode = GET_MODE (op0);
+  machine_mode op1mode = GET_MODE (op1);
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+  machine_mode mode0 = insn_data[icode].operand[1].mode;
+  machine_mode mode1 = insn_data[icode].operand[2].mode;
+  rtx ret = target;
+
+  if (! target
+      || GET_MODE (target) != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
+	      && (op1mode == mode1 || op1mode == VOIDmode));
+
+  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+    op0 = copy_to_mode_reg (mode0, op0);
+  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+    op1 = copy_to_mode_reg (mode1, op1);
+
+  pat = GEN_FCN (icode) (target, op0, op1);
+
+  if (! pat)
+    return 0;
+
+  emit_insn (pat);
+
+  return ret;
+}
+
+
+struct builtin_description
+{
+  const enum insn_code icode;
+  const char *const name;
+  const enum nvptx_builtins code;
+};
+
+static const struct builtin_description bdesc_2arg[] =
+{
+  { CODE_FOR_thread_shuffle_downsi, "__builtin_nvptx_shuffle_down", NVPTX_BUILTIN_SHUFFLE_DOWN },
+  { CODE_FOR_thread_shuffle_downsf, "__builtin_nvptx_shuffle_downf", NVPTX_BUILTIN_SHUFFLE_DOWNF },
+  { CODE_FOR_thread_shuffle_downdi, "__builtin_nvptx_shuffle_downll", NVPTX_BUILTIN_SHUFFLE_DOWNLL }
+};
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+nvptx_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+		     rtx subtarget ATTRIBUTE_UNUSED,
+		     machine_mode mode ATTRIBUTE_UNUSED,
+		     int ignore ATTRIBUTE_UNUSED)
+{
+  size_t i;
+  const struct builtin_description *d;
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+
+  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+    if (d->code == fcode)
+      return nvptx_expand_binop_builtin (d->icode, exp, target);
+
+  gcc_unreachable ();
+}
+\f
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE nvptx_option_override
 
@@ -3145,6 +3278,13 @@ nvptx_file_end (void)
 #undef TARGET_CANNOT_COPY_INSN_P
 #define TARGET_CANNOT_COPY_INSN_P nvptx_cannot_copy_insn_p
 
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS nvptx_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN nvptx_expand_builtin
+#undef  TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL nvptx_builtin_decl
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 225936)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -70,6 +70,8 @@ (define_c_enum "unspecv" [
    UNSPECV_FORKED
    UNSPECV_JOINING
    UNSPECV_JOIN
+
+   UNSPECV_SHFL_DOWN
 ])
 
 (define_attr "subregs_ok" "false,true"
@@ -1416,6 +1418,39 @@ (define_insn "nvptx_broadcast<mode>"
   ""
   "%.\\tshfl.idx.b32\\t%0, %1, 0, 31;")
 
+(define_insn "thread_shuffle_down<mode>"
+  [(set (match_operand:BITS 0 "nvptx_register_operand" "")
+	(unspec_volatile:BITS [(match_operand:SI 1 "nvptx_register_operand" "")
+			       (match_operand:SI 2 "nvptx_nonmemory_operand" "")]
+			      UNSPECV_SHFL_DOWN))]
+  ""
+  "%.\\tshfl.down.b32\\t%0, %1, %2, 31;")
+
+(define_expand "thread_shuffle_downdi"
+  [(set (match_operand:DI 0 "nvptx_register_operand" "")
+	(unspec_volatile:DI [(match_operand:DI 1 "nvptx_register_operand" "")
+			     (match_operand:SI 2 "nvptx_nonmemory_operand" "")]
+			    UNSPECV_SHFL_DOWN))]
+  ""
+{
+  rtx t = gen_reg_rtx (DImode);
+  emit_insn (gen_lshrdi3 (t, operands[1], GEN_INT (32)));
+  rtx op0 = force_reg (SImode, gen_lowpart (SImode, t));
+  rtx op1 = force_reg (SImode, gen_lowpart (SImode, operands[1]));
+  rtx targ0 = gen_reg_rtx (SImode);
+  rtx targ1 = gen_reg_rtx (SImode);
+  emit_insn (gen_thread_shuffle_downsi (targ0, op0, operands[2]));
+  emit_insn (gen_thread_shuffle_downsi (targ1, op1, operands[2]));
+  rtx t2 = gen_reg_rtx (DImode);
+  rtx t3 = gen_reg_rtx (DImode);
+  emit_insn (gen_extendsidi2 (t2, targ0));
+  emit_insn (gen_extendsidi2 (t3, targ1));
+  rtx t4 = gen_reg_rtx (DImode);
+  emit_insn (gen_ashldi3 (t4, t2, GEN_INT (32)));
+  emit_insn (gen_iordi3 (operands[0], t3, t4));
+  DONE;
+})
+
 ;; extract parts of a 64 bit object into 2 32-bit ints
 (define_insn "unpack<mode>si2"
   [(set (match_operand:SI 0 "nvptx_register_operand" "")

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [gomp4] New nvptx pattern and internal builtin
  2015-07-17 15:44 [gomp4] New nvptx pattern and internal builtin Bernd Schmidt
@ 2015-07-20 13:45 ` Nathan Sidwell
  2015-07-20 14:10   ` Bernd Schmidt
  0 siblings, 1 reply; 3+ messages in thread
From: Nathan Sidwell @ 2015-07-20 13:45 UTC (permalink / raw)
  To: Bernd Schmidt, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 732 bytes --]

On 07/17/15 11:37, Bernd Schmidt wrote:
> I've made this change at the request of Cesar who says it's needed for his
> reductions work. It makes a new instruction to represent shfl.down, a thread
> communication instruction, and some builtin functions for internal use to access
> it.

I was looking at adding another target builtin, and found this code rather 
convoluted.  It seemed to have been cloned from somewhere more complicated -- 
for instance, nvptx_expand_binop_builtin's comment discusses a MACFLAG argument, 
which is nowhere to be seen.

I ended up reimplementing using a single array describing the builtins and 
allowing direct indexing using the builtin number, rather than iteration when 
expanding.

ok?

nathan

[-- Attachment #2: gomp4-blt.patch --]
[-- Type: text/x-patch, Size: 6832 bytes --]

2015-07-20  Nathan Sidwell  <nathan@codesourcery.com>

	* config/nvptx/nvptx.c (nvptx_builtins): Delete enum.
	(nvptx_types): New enum.
	(builtin_description): Add type and num_args fields.
	(builtins): New array describing builtins.
	(NVPTX_BUILTIN_MAX): Define.
	(def_builtin): Delete.
	(nvptx_init_builtins): Reimplement using builtins array.
	(nvptx_expand_binop_builtin): Delete.
	(bdesc_2arg): Delete.
	(nvptx_expand_builtin): Reimplement using builtins array.

Index: config/nvptx/nvptx.c
===================================================================
--- config/nvptx/nvptx.c	(revision 225992)
+++ config/nvptx/nvptx.c	(working copy)
@@ -3058,16 +3058,34 @@ nvptx_file_end (void)
     }
 }
 \f
-/* Codes for all the NVPTX builtins.  */
-enum nvptx_builtins
+enum nvptx_types
+  {
+    NT_UINT_UINT_INT,
+    NT_ULL_ULL_INT,
+    NT_FLT_FLT_INT,
+
+    NT_MAX
+  };
+
+struct builtin_description
 {
-  NVPTX_BUILTIN_SHUFFLE_DOWN,
-  NVPTX_BUILTIN_SHUFFLE_DOWNF,
-  NVPTX_BUILTIN_SHUFFLE_DOWNLL,
+  const char *name;
+  enum insn_code icode;
+  unsigned short type;
+  unsigned short num_args;
+};
 
-  NVPTX_BUILTIN_MAX
+static const struct builtin_description builtins[] =
+{
+  {"__builtin_nvptx_shuffle_down", CODE_FOR_thread_shuffle_downsi,
+   NT_UINT_UINT_INT, 2},
+  {"__builtin_nvptx_shuffle_downf", CODE_FOR_thread_shuffle_downsf,
+   NT_FLT_FLT_INT, 2},
+  { "__builtin_nvptx_shuffle_downll", CODE_FOR_thread_shuffle_downdi,
+    NT_ULL_ULL_INT, 2},
 };
 
+#define NVPTX_BUILTIN_MAX (sizeof (builtins) / sizeof (builtins[0]))
 
 static GTY(()) tree nvptx_builtin_decls[NVPTX_BUILTIN_MAX];
 
@@ -3081,92 +3099,30 @@ nvptx_builtin_decl (unsigned code, bool
   return nvptx_builtin_decls[code];
 }
 
-#define def_builtin(NAME, TYPE, CODE)					\
-do {									\
-  tree bdecl;								\
-  bdecl = add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
-				NULL, NULL_TREE);			\
-  nvptx_builtin_decls[CODE] = bdecl;					\
-} while (0)
-
 /* Set up all builtin functions for this target.  */
 static void
 nvptx_init_builtins (void)
-{ 
-  tree uint_ftype_uint_int
+{
+  tree types[NT_MAX];
+  unsigned ix;
+
+  types[NT_UINT_UINT_INT]
     = build_function_type_list (unsigned_type_node, unsigned_type_node,
 				integer_type_node, NULL_TREE);
-  tree ull_ftype_ull_int
+  types[NT_ULL_ULL_INT]
     = build_function_type_list (long_long_unsigned_type_node,
 				long_long_unsigned_type_node,
 				integer_type_node, NULL_TREE);
-  tree float_ftype_float_int
+  types[NT_FLT_FLT_INT]
     = build_function_type_list (float_type_node, float_type_node,
 				integer_type_node, NULL_TREE);
-  def_builtin ("__builtin_nvptx_shuffle_down", uint_ftype_uint_int,
-	       NVPTX_BUILTIN_SHUFFLE_DOWN);
-  def_builtin ("__builtin_nvptx_shuffle_downf", float_ftype_float_int,
-	       NVPTX_BUILTIN_SHUFFLE_DOWNF);
-  def_builtin ("__builtin_nvptx_shuffle_downll", ull_ftype_ull_int,
-	       NVPTX_BUILTIN_SHUFFLE_DOWNLL);
-}
-
-/* Subroutine of nvptx_expand_builtin to take care of binop insns.  MACFLAG is -1
-   if this is a normal binary op, or one of the MACFLAG_xxx constants.  */
-
-static rtx
-nvptx_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
-{
-  rtx pat;
-  tree arg0 = CALL_EXPR_ARG (exp, 0);
-  tree arg1 = CALL_EXPR_ARG (exp, 1);
-  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
-  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
-  machine_mode op0mode = GET_MODE (op0);
-  machine_mode op1mode = GET_MODE (op1);
-  machine_mode tmode = insn_data[icode].operand[0].mode;
-  machine_mode mode0 = insn_data[icode].operand[1].mode;
-  machine_mode mode1 = insn_data[icode].operand[2].mode;
-  rtx ret = target;
-
-  if (! target
-      || GET_MODE (target) != tmode
-      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-    target = gen_reg_rtx (tmode);
-
-  gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
-	      && (op1mode == mode1 || op1mode == VOIDmode));
 
-  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-    op0 = copy_to_mode_reg (mode0, op0);
-  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-    op1 = copy_to_mode_reg (mode1, op1);
-
-  pat = GEN_FCN (icode) (target, op0, op1);
-
-  if (! pat)
-    return 0;
-
-  emit_insn (pat);
-
-  return ret;
+  for (ix = 0; ix != NVPTX_BUILTIN_MAX; ix++)
+    nvptx_builtin_decls[ix]
+      =  add_builtin_function (builtins[ix].name, types[builtins[ix].type],
+			       ix, BUILT_IN_MD, NULL, NULL_TREE);
 }
 
-
-struct builtin_description
-{
-  const enum insn_code icode;
-  const char *const name;
-  const enum nvptx_builtins code;
-};
-
-static const struct builtin_description bdesc_2arg[] =
-{
-  { CODE_FOR_thread_shuffle_downsi, "__builtin_nvptx_shuffle_down", NVPTX_BUILTIN_SHUFFLE_DOWN },
-  { CODE_FOR_thread_shuffle_downsf, "__builtin_nvptx_shuffle_downf", NVPTX_BUILTIN_SHUFFLE_DOWNF },
-  { CODE_FOR_thread_shuffle_downdi, "__builtin_nvptx_shuffle_downll", NVPTX_BUILTIN_SHUFFLE_DOWNLL }
-};
-
 /* Expand an expression EXP that calls a built-in function,
    with result going to TARGET if that's convenient
    (and in mode MODE if that's convenient).
@@ -3174,21 +3130,41 @@ static const struct builtin_description
    IGNORE is nonzero if the value is to be ignored.  */
 
 static rtx
-nvptx_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
+nvptx_expand_builtin (tree exp, rtx target,
 		     rtx subtarget ATTRIBUTE_UNUSED,
-		     machine_mode mode ATTRIBUTE_UNUSED,
-		     int ignore ATTRIBUTE_UNUSED)
+		     machine_mode mode,
+		     int ignore)
 {
-  size_t i;
-  const struct builtin_description *d;
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  const struct builtin_description *d = &builtins[DECL_FUNCTION_CODE (fndecl)];
+  unsigned icode = d->icode;
+  rtx operands[2]; /* maxium operands */
+  unsigned ix;
+  machine_mode tmode = insn_data[icode].operand[0].mode;
+
+  if (ignore)
+    return target;
+  
+  if (! target
+      || mode != tmode
+      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+    target = gen_reg_rtx (tmode);
+
+  for (ix = d->num_args; ix--;)
+    {
+      machine_mode m = insn_data[icode].operand[ix + 1].mode;
+      rtx op = expand_expr (CALL_EXPR_ARG (exp, ix),
+			    NULL_RTX, VOIDmode, EXPAND_NORMAL);
+      if (! (*insn_data[icode].operand[ix + 1].predicate) (op, m))
+	op = copy_to_mode_reg (m, op);
+      operands[ix] = op;
+    }
 
-  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
-    if (d->code == fcode)
-      return nvptx_expand_binop_builtin (d->icode, exp, target);
+  rtx pat = GEN_FCN (icode) (target, operands[0], operands[1]);
+  if (pat)
+    emit_insn (pat);
 
-  gcc_unreachable ();
+  return target;
 }
 \f
 #undef TARGET_OPTION_OVERRIDE

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [gomp4] New nvptx pattern and internal builtin
  2015-07-20 13:45 ` Nathan Sidwell
@ 2015-07-20 14:10   ` Bernd Schmidt
  0 siblings, 0 replies; 3+ messages in thread
From: Bernd Schmidt @ 2015-07-20 14:10 UTC (permalink / raw)
  To: Nathan Sidwell, gcc-patches

On 07/20/2015 03:19 PM, Nathan Sidwell wrote:

> I was looking at adding another target builtin, and found this code
> rather convoluted.  It seemed to have been cloned from somewhere more
> complicated -- for instance, nvptx_expand_binop_builtin's comment
> discusses a MACFLAG argument, which is nowhere to be seen.

Okay, I admit to tuning out comments for code that I know, and I didn't 
notice that one. As for being convoluted - this is pretty much the 
standard structure for the machine specific builtins which is used in a 
lot of ports.

> I ended up reimplementing using a single array describing the builtins
> and allowing direct indexing using the builtin number, rather than
> iteration when expanding.

If you really want to, that's fine, but note the point about consistency 
with other ports.


Bernd

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2015-07-20 13:38 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-17 15:44 [gomp4] New nvptx pattern and internal builtin Bernd Schmidt
2015-07-20 13:45 ` Nathan Sidwell
2015-07-20 14:10   ` Bernd Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).