public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
                   ` (3 preceding siblings ...)
  2017-03-22 15:46 ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Alexander Monakov
@ 2017-03-22 15:46 ` Alexander Monakov
  2017-03-23 10:47   ` Jakub Jelinek
  2017-03-31 10:22 ` [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Thomas Schwinge
  5 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-22 15:46 UTC (permalink / raw)
  To: gcc-patches

This patch implements privatization for SIMT during inlining.  We need to
discover if the call being inlined belongs to a SIMT region (by looking at
simduid of the containing loop), and if so, treat them similar to OpenMP-SIMD
privatization: add the "omp simt private" attribute and mention them among
arguments of GOMP_SIMT_ENTER.

OpenMP-SIMD privatization also adds a clobber at the end of the region; I'm
not sure if it's required here: in the example I've looked at, inlined code
already contained a clobber.

	* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
        * tree-inline.c (expand_call_inline): Handle SIMT privatization.
        (copy_decl_for_dup_finish): Ditto.
---
 gcc/tree-inline.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 gcc/tree-inline.h |  4 ++++
 2 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 6b6d489..56817e4 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4385,6 +4385,11 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   gcall *call_stmt;
   unsigned int i;
   unsigned int prop_mask, src_properties;
+  struct function *dst_cfun;
+  tree simduid;
+  use_operand_p use;
+  gimple *simtenter_stmt = NULL;
+  hash_set<tree> *simtvars_st = NULL;
 
   /* The gimplifier uses input_location in too many places, such as
      internal_get_tmp_var ().  */
@@ -4588,15 +4593,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   id->src_cfun = DECL_STRUCT_FUNCTION (fn);
   id->call_stmt = call_stmt;
 
+  /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
+     variables to be added to IFN_GOMP_SIMT_ENTER argument list.  */
+  dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
+  if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
+      && (simduid = bb->loop_father->simduid) != NULL_TREE
+      && (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
+      && single_imm_use (simduid, &use, &simtenter_stmt)
+      && is_gimple_call (simtenter_stmt)
+      && gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
+    {
+      simtvars_st = id->dst_simt_vars;
+      id->dst_simt_vars = new hash_set<tree>;
+    }
+
   /* If the src function contains an IFN_VA_ARG, then so will the dst
      function after inlining.  Likewise for IFN_GOMP_USE_SIMT.  */
   prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
   src_properties = id->src_cfun->curr_properties & prop_mask;
   if (src_properties != prop_mask)
-    {
-      struct function *dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
-      dst_cfun->curr_properties &= src_properties | ~prop_mask;
-    }
+    dst_cfun->curr_properties &= src_properties | ~prop_mask;
 
   gcc_assert (!id->src_cfun->after_inlining);
 
@@ -4730,6 +4746,25 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   if (cfun->gimple_df)
     pt_solution_reset (&cfun->gimple_df->escaped);
 
+  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
+  if (id->dst_simt_vars)
+    {
+      size_t nargs = gimple_call_num_args (simtenter_stmt);
+      hash_set<tree> *vars = id->dst_simt_vars;
+      auto_vec<tree> newargs (nargs + vars->elements ());
+      for (size_t i = 0; i < nargs; i++)
+	newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
+      for (hash_set<tree>::iterator i = vars->begin (); i != vars->end (); ++i)
+	newargs.quick_push (build1 (ADDR_EXPR,
+				    build_pointer_type (TREE_TYPE (*i)), *i));
+      gcall *g = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
+      gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
+      gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
+      gsi_replace (&gsi, g, false);
+      delete id->dst_simt_vars;
+      id->dst_simt_vars = simtvars_st;
+    }
+
   /* Clean up.  */
   if (id->debug_map)
     {
@@ -5453,9 +5488,19 @@ copy_decl_for_dup_finish (copy_body_data *id, tree decl, tree copy)
        function.  */
     ;
   else
-    /* Ordinary automatic local variables are now in the scope of the
-       new function.  */
-    DECL_CONTEXT (copy) = id->dst_fn;
+    {
+      /* Ordinary automatic local variables are now in the scope of the
+	 new function.  */
+      DECL_CONTEXT (copy) = id->dst_fn;
+      if (VAR_P (copy) && id->dst_simt_vars && !is_gimple_reg (copy))
+	{
+	  if (!lookup_attribute ("omp simt private", DECL_ATTRIBUTES (copy)))
+	    DECL_ATTRIBUTES (copy)
+	      = tree_cons (get_identifier ("omp simt private"), NULL,
+			   DECL_ATTRIBUTES (copy));
+	  id->dst_simt_vars->add (copy);
+	}
+    }
 
   return copy;
 }
diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h
index 88b3286..cf46fa5 100644
--- a/gcc/tree-inline.h
+++ b/gcc/tree-inline.h
@@ -145,6 +145,10 @@ struct copy_body_data
      equivalents in the function into which it is being inlined.  */
   hash_map<dependence_hash, unsigned short> *dependence_map;
 
+  /* A set of local variables in the function that is being inlined into
+     an OpenMP SIMD-on-SIMT loop.  */
+  hash_set<tree> *dst_simt_vars;
+
   /* Cilk keywords currently need to replace some variables that
      ordinary nested functions do not.  */
   bool remap_var_for_cilk;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions
@ 2017-03-22 15:46 Alexander Monakov
  2017-03-22 15:46 ` [PATCH 1/5] nvptx: implement SIMT enter/exit insns Alexander Monakov
                   ` (5 more replies)
  0 siblings, 6 replies; 27+ messages in thread
From: Alexander Monakov @ 2017-03-22 15:46 UTC (permalink / raw)
  To: gcc-patches

Hello,

This patchset implements privatization of addressable variables in OpenMP SIMD
regions lowered for SIMT targets (i.e. NVPTX) via the approach identified in
the review of the previous submission.

Now instead of explicitly privatizing those variables as fields of an
allocated struct up front, we keep them as normal variables in the IR until
after IPA passes.  After that, the ompdevlow pass rewrites the IR to make
privatization explicit (patch 3/5).

Inlining is taught to privatize variables this way (patch 4), and when a
variable no longer has its address taken, it can be promoted to a gimple
register and no longer be subject to special privatization (patch 5).

Post-omplow IR looks like this:

  void *simtrec;
  int priv1 __attribute__((omp simt private));

  simduid.n_2 = GOMP_SIMT_ENTER (simduid.n_1, &priv1, &priv2, ...);
  simtrec = GOMP_SIMT_ENTER_ALLOC (simduid.n_2);

  for (...) { foo (&priv1); }

  priv1 = {CLOBBER};
  GOMP_SIMT_EXIT (simtrec);

And post-ompdevlow IR looks like this:

  struct {
    int priv1;
  } *simtrec;
  int priv1 [value-expr: simtrec->priv1];
  /* priv1 is no longer itself present in IR */

  simduid.n_2 = simduid.n_1;
  simtrec = GOMP_SIMT_ENTER_ALLOC (sizeof *simtrec, alignof *simtrec);

  for (...) { foo (&simtrec->priv1); }

  *simtrec = {CLOBBER};
  GOMP_SIMT_EXIT (simtrec);

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 1/5] nvptx: implement SIMT enter/exit insns
  2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
@ 2017-03-22 15:46 ` Alexander Monakov
  2017-03-27 11:12   ` Alexander Monakov
  2017-03-22 15:46 ` [PATCH 5/5] address-taken: optimize SIMT privatized variables Alexander Monakov
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-22 15:46 UTC (permalink / raw)
  To: gcc-patches

This patch adds handling of new omp_simt_enter/omp_simt_exit named insns
in the NVPTX backend.

	* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
        (nvptx_output_simt_exit): Declare.
        * config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
        cfun->machine->unisimt_location.  Handle NULL unisimt_predicate.
        (init_softstack_frame): Move initialization of crtl->is_leaf to...
        (nvptx_declare_function_name): ...here.  Emit declaration of local
        memory space buffer for omp_simt_enter insn.
        (nvptx_output_unisimt_switch): New.
        (nvptx_output_softstack_switch): New.
        (nvptx_output_simt_enter): New.
        (nvptx_output_simt_exit): New.
        * config/nvptx/nvptx.h (struct machine_function): New fields
        has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
        * config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
        (UNSPECV_SIMT_EXIT): Ditto.
        (omp_simt_enter_insn): New insn.
        (omp_simt_enter): New expansion.
        (omp_simt_exit): New insn.
        * config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.


---
 gcc/config/nvptx/nvptx-protos.h |   2 +
 gcc/config/nvptx/nvptx.c        | 163 +++++++++++++++++++++++++++++++++++-----
 gcc/config/nvptx/nvptx.h        |   6 ++
 gcc/config/nvptx/nvptx.md       |  39 ++++++++++
 gcc/config/nvptx/nvptx.opt      |   4 +
 5 files changed, 196 insertions(+), 18 deletions(-)

diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index aaea3ba..16b316f 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -53,5 +53,7 @@ extern const char *nvptx_output_mov_insn (rtx, rtx);
 extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
 extern const char *nvptx_output_return (void);
 extern const char *nvptx_output_set_softstack (unsigned);
+extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
+extern const char *nvptx_output_simt_exit (rtx);
 #endif
 #endif
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index 647855c..83f4610 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -1048,11 +1048,6 @@ init_softstack_frame (FILE *file, unsigned alignment, HOST_WIDE_INT size)
   fprintf (file, "\t\tsub.u%d %s, %s, " HOST_WIDE_INT_PRINT_DEC ";\n",
 	   bits, reg_stack, reg_frame, size);
 
-  /* Usually 'crtl->is_leaf' is computed during register allocator
-     initialization (which is not done on NVPTX) or for pressure-sensitive
-     optimizations.  Initialize it here, except if already set.  */
-  if (!crtl->is_leaf)
-    crtl->is_leaf = leaf_function_p ();
   if (!crtl->is_leaf)
     fprintf (file, "\t\tst.shared.u%d [%s], %s;\n",
 	     bits, reg_sspslot, reg_stack);
@@ -1080,24 +1075,29 @@ nvptx_init_axis_predicate (FILE *file, int regno, const char *name)
 static void
 nvptx_init_unisimt_predicate (FILE *file)
 {
+  cfun->machine->unisimt_location = gen_reg_rtx (Pmode);
+  int loc = REGNO (cfun->machine->unisimt_location);
   int bits = POINTER_SIZE;
-  int master = REGNO (cfun->machine->unisimt_master);
-  int pred = REGNO (cfun->machine->unisimt_predicate);
+  fprintf (file, "\t.reg.u%d %%r%d;\n", bits, loc);
   fprintf (file, "\t{\n");
   fprintf (file, "\t\t.reg.u32 %%ustmp0;\n");
   fprintf (file, "\t\t.reg.u%d %%ustmp1;\n", bits);
-  fprintf (file, "\t\t.reg.u%d %%ustmp2;\n", bits);
   fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.y;\n");
   fprintf (file, "\t\tmul%s.u32 %%ustmp1, %%ustmp0, 4;\n",
 	   bits == 64 ? ".wide" : ".lo");
-  fprintf (file, "\t\tmov.u%d %%ustmp2, __nvptx_uni;\n", bits);
-  fprintf (file, "\t\tadd.u%d %%ustmp2, %%ustmp2, %%ustmp1;\n", bits);
-  fprintf (file, "\t\tld.shared.u32 %%r%d, [%%ustmp2];\n", master);
-  fprintf (file, "\t\tmov.u32 %%ustmp0, %%tid.x;\n");
-  /* Compute 'master lane index' as 'tid.x & __nvptx_uni[tid.y]'.  */
-  fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
-  /* Compute predicate as 'tid.x == master'.  */
-  fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
+  fprintf (file, "\t\tmov.u%d %%r%d, __nvptx_uni;\n", bits, loc);
+  fprintf (file, "\t\tadd.u%d %%r%d, %%r%d, %%ustmp1;\n", bits, loc, loc);
+  if (cfun->machine->unisimt_predicate)
+    {
+      int master = REGNO (cfun->machine->unisimt_master);
+      int pred = REGNO (cfun->machine->unisimt_predicate);
+      fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
+      fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
+      /* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'.  */
+      fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
+      /* Compute predicate as 'tid.x == master'.  */
+      fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp0;\n", pred, master);
+    }
   fprintf (file, "\t}\n");
   need_unisimt_decl = true;
 }
@@ -1224,6 +1224,12 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
 
   fprintf (file, "%s", s.str().c_str());
 
+  /* Usually 'crtl->is_leaf' is computed during register allocator
+     initialization (which is not done on NVPTX) or for pressure-sensitive
+     optimizations.  Initialize it here, except if already set.  */
+  if (!crtl->is_leaf)
+    crtl->is_leaf = leaf_function_p ();
+
   HOST_WIDE_INT sz = get_frame_size ();
   bool need_frameptr = sz || cfun->machine->has_chain;
   int alignment = crtl->stack_alignment_needed / BITS_PER_UNIT;
@@ -1240,9 +1246,28 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
 	init_frame (file, FRAME_POINTER_REGNUM, alignment,
 		    ROUND_UP (sz, GET_MODE_SIZE (DImode)));
     }
-  else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca)
+  else if (need_frameptr || cfun->machine->has_varadic || cfun->calls_alloca
+	   || (cfun->machine->has_simtreg && !crtl->is_leaf))
     init_softstack_frame (file, alignment, sz);
 
+  if (cfun->machine->has_simtreg)
+    {
+      unsigned HOST_WIDE_INT &simtsz = cfun->machine->simt_stack_size;
+      unsigned HOST_WIDE_INT &align = cfun->machine->simt_stack_align;
+      align = MAX (align, GET_MODE_SIZE (DImode));
+      if (!crtl->is_leaf || cfun->calls_alloca)
+	simtsz = HOST_WIDE_INT_M1U;
+      if (simtsz == HOST_WIDE_INT_M1U)
+	simtsz = nvptx_softstack_size;
+      if (cfun->machine->has_softstack)
+	simtsz += POINTER_SIZE / 8;
+      simtsz = ROUND_UP (simtsz, GET_MODE_SIZE (DImode));
+      if (align > GET_MODE_SIZE (DImode))
+	simtsz += align - GET_MODE_SIZE (DImode);
+      if (simtsz)
+	fprintf (file, "\t.local.align 8 .b8 %%simtstack_ar["
+		HOST_WIDE_INT_PRINT_DEC "];\n", simtsz);
+    }
   /* Declare the pseudos we have as ptx registers.  */
   int maxregs = max_reg_num ();
   for (int i = LAST_VIRTUAL_REGISTER + 1; i < maxregs; i++)
@@ -1267,10 +1292,112 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   if (cfun->machine->axis_predicate[1])
     nvptx_init_axis_predicate (file,
 			       REGNO (cfun->machine->axis_predicate[1]), "x");
-  if (cfun->machine->unisimt_predicate)
+  if (cfun->machine->unisimt_predicate
+      || (cfun->machine->has_simtreg && !crtl->is_leaf))
     nvptx_init_unisimt_predicate (file);
 }
 
+/* Output code for switching uniform-simt state.  ENTERING indicates whether
+   we are entering or leaving non-uniform execution region.  */
+
+static void
+nvptx_output_unisimt_switch (FILE *file, bool entering)
+{
+  if (crtl->is_leaf && !cfun->machine->unisimt_predicate)
+    return;
+  fprintf (file, "\t{\n");
+  fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
+  fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
+  if (!crtl->is_leaf)
+    {
+      int loc = REGNO (cfun->machine->unisimt_location);
+      fprintf (file, "\t\tst.shared.u32 [%%r%d], %%ustmp2;\n", loc);
+    }
+  if (cfun->machine->unisimt_predicate)
+    {
+      int master = REGNO (cfun->machine->unisimt_master);
+      int pred = REGNO (cfun->machine->unisimt_predicate);
+      fprintf (file, "\t\tmov.u32 %%ustmp2, %%laneid;\n");
+      fprintf (file, "\t\tmov.u32 %%r%d, %s;\n",
+	       master, entering ? "%ustmp2" : "0");
+      fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, %%ustmp2;\n", pred, master);
+    }
+  fprintf (file, "\t}\n");
+}
+
+/* Output code for allocating per-lane storage and switching soft-stack pointer.
+   ENTERING indicates whether we are entering or leaving non-uniform execution.
+   PTR is the register pointing to allocated storage, it is assigned to on
+   entering and used to restore state on leaving.  SIZE and ALIGN are used only
+   on entering.  */
+
+static void
+nvptx_output_softstack_switch (FILE *file, bool entering,
+			       rtx ptr, rtx size, rtx align)
+{
+  gcc_assert (REG_P (ptr) && !HARD_REGISTER_P (ptr));
+  if (crtl->is_leaf && !cfun->machine->simt_stack_size)
+    return;
+  int bits = POINTER_SIZE, regno = REGNO (ptr);
+  fprintf (file, "\t{\n");
+  if (entering)
+    {
+      fprintf (file, "\t\tcvta.local.u%d %%r%d, %%simtstack_ar + "
+	       HOST_WIDE_INT_PRINT_DEC ";\n", bits, regno,
+	       cfun->machine->simt_stack_size);
+      fprintf (file, "\t\tsub.u%d %%r%d, %%r%d, ", bits, regno, regno);
+      if (CONST_INT_P (size))
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+		 ROUND_UP (UINTVAL (size), GET_MODE_SIZE (DImode)));
+      else
+	output_reg (file, REGNO (size), VOIDmode);
+      fputs (";\n", file);
+      if (!CONST_INT_P (size) || UINTVAL (align) > GET_MODE_SIZE (DImode))
+	fprintf (file, "\t\tand.u%d %%r%d, %%r%d, -%d;\n",
+		 bits, regno, regno, UINTVAL (align));
+    }
+  if (cfun->machine->has_softstack)
+    {
+      const char *reg_stack = reg_names[STACK_POINTER_REGNUM];
+      if (entering)
+	{
+	  fprintf (file, "\t\tst.u%d [%%r%d + -%d], %s;\n",
+		   bits, regno, bits / 8, reg_stack);
+	  fprintf (file, "\t\tsub.u%d %s, %%r%d, %d;\n",
+		   bits, reg_stack, regno, bits / 8);
+	}
+      else
+	{
+	  fprintf (file, "\t\tld.u%d %s, [%%r%d + -%d];\n",
+		   bits, reg_stack, regno, bits / 8);
+	}
+      nvptx_output_set_softstack (REGNO (stack_pointer_rtx));
+    }
+  fprintf (file, "\t}\n");
+}
+
+/* Output code to enter non-uniform execution region.  DEST is a register
+   to hold a per-lane allocation given by SIZE and ALIGN.  */
+
+const char *
+nvptx_output_simt_enter (rtx dest, rtx size, rtx align)
+{
+  nvptx_output_unisimt_switch (asm_out_file, true);
+  nvptx_output_softstack_switch (asm_out_file, true, dest, size, align);
+  return "";
+}
+
+/* Output code to leave non-uniform execution region.  SRC is the register
+   holding per-lane storage previously allocated by omp_simt_enter insn.  */
+
+const char *
+nvptx_output_simt_exit (rtx src)
+{
+  nvptx_output_unisimt_switch (asm_out_file, false);
+  nvptx_output_softstack_switch (asm_out_file, false, src, NULL_RTX, NULL_RTX);
+  return "";
+}
+
 /* Output instruction that sets soft stack pointer in shared memory to the
    value in register given by SRC_REGNO.  */
 
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 8338d4e..0a000a7 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -213,12 +213,18 @@ struct GTY(()) machine_function
   bool has_varadic;  /* Current function has a varadic call.  */
   bool has_chain; /* Current function has outgoing static chain.  */
   bool has_softstack; /* Current function has a soft stack frame.  */
+  bool has_simtreg; /* Current function has an OpenMP SIMD region.  */
   int num_args;	/* Number of args of current call.  */
   int return_mode; /* Return mode of current fn.
 		      (machine_mode not defined yet.) */
   rtx axis_predicate[2]; /* Neutering predicates.  */
   rtx unisimt_master; /* 'Master lane index' for -muniform-simt.  */
   rtx unisimt_predicate; /* Predicate for -muniform-simt.  */
+  rtx unisimt_location; /* Mask location for -muniform-simt.  */
+  /* The following two fields hold the maximum size resp. alignment required
+     for per-lane storage in OpenMP SIMD regions.  */
+  unsigned HOST_WIDE_INT simt_stack_size;
+  unsigned HOST_WIDE_INT simt_stack_align;
 };
 #endif
 \f
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 50dd42e..f2ed63b 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -63,6 +63,9 @@ (define_c_enum "unspecv" [
    UNSPECV_JOIN
 
    UNSPECV_NOUNROLL
+
+   UNSPECV_SIMT_ENTER
+   UNSPECV_SIMT_EXIT
 ])
 
 (define_attr "subregs_ok" "false,true"
@@ -1184,6 +1187,42 @@ (define_insn "nvptx_vote_ballot"
 
 ;; Patterns for OpenMP SIMD-via-SIMT lowering
 
+(define_insn "omp_simt_enter_insn"
+  [(set (match_operand 0 "nvptx_register_operand" "=R")
+	(unspec_volatile [(match_operand 1 "nvptx_nonmemory_operand" "Ri")
+			    (match_operand 2 "nvptx_nonmemory_operand" "Ri")]
+			   UNSPECV_SIMT_ENTER))]
+  ""
+{
+  return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
+})
+
+(define_expand "omp_simt_enter"
+  [(match_operand 0 "nvptx_register_operand" "=R")
+   (match_operand 1 "nvptx_nonmemory_operand" "Ri")
+   (match_operand 2 "const_int_operand" "n")]
+  ""
+{
+  if (!CONST_INT_P (operands[1]))
+    cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
+  else
+    cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
+					  cfun->machine->simt_stack_size);
+  cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
+					 cfun->machine->simt_stack_align);
+  cfun->machine->has_simtreg = true;
+  emit_insn (gen_omp_simt_enter_insn (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_insn "omp_simt_exit"
+  [(unspec_volatile [(match_operand 0 "nvptx_register_operand" "R")]
+		    UNSPECV_SIMT_EXIT)]
+  ""
+{
+  return nvptx_output_simt_exit (operands[0]);
+})
+
 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
 (define_insn "omp_simt_lane"
   [(set (match_operand:SI 0 "nvptx_register_operand" "")
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index 80aab5b..901def7 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -37,6 +37,10 @@ msoft-stack
 Target Report Mask(SOFT_STACK)
 Use custom stacks instead of local memory for automatic storage.
 
+msoft-stack-reserve-local
+Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128)
+Specify size of .local memory used for stack when the exact amount is not known.
+
 muniform-simt
 Target Report Mask(UNIFORM_SIMT)
 Generate code that can keep local state uniform across all lanes.
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 5/5] address-taken: optimize SIMT privatized variables
  2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
  2017-03-22 15:46 ` [PATCH 1/5] nvptx: implement SIMT enter/exit insns Alexander Monakov
@ 2017-03-22 15:46 ` Alexander Monakov
  2017-03-23 10:48   ` Jakub Jelinek
  2017-03-22 15:46 ` [PATCH 3/5] omp-offload: implement SIMT privatization, part 2 Alexander Monakov
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-22 15:46 UTC (permalink / raw)
  To: gcc-patches

This patch implements promotion of SIMT private variables if GOMP_SIMT_ENTER
is the only remaining statement where their address is taken, by handling it
similar to ASAN_MARK.

To avoid rebuilding GOMP_SIMT_ENTER statement from scratch, set argument
slot to a null pointer when the corresponding variable is optimized.

	* tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER.

---
 gcc/tree-ssa.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
index 831fd61..42e708e 100644
--- a/gcc/tree-ssa.c
+++ b/gcc/tree-ssa.c
@@ -1654,7 +1654,8 @@ execute_update_addresses_taken (void)
 		  gimple_ior_addresses_taken (addresses_taken, stmt);
 		  gimple_call_set_arg (stmt, 1, arg);
 		}
-	      else if (is_asan_mark_p (stmt))
+	      else if (is_asan_mark_p (stmt)
+		       || gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
 		;
 	      else
 		gimple_ior_addresses_taken (addresses_taken, stmt);
@@ -1940,6 +1941,18 @@ execute_update_addresses_taken (void)
 			continue;
 		      }
 		  }
+		else if (gimple_call_internal_p (stmt, IFN_GOMP_SIMT_ENTER))
+		  for (i = 1; i < gimple_call_num_args (stmt); i++)
+		    {
+		      tree *argp = gimple_call_arg_ptr (stmt, i);
+		      if (*argp == null_pointer_node)
+			continue;
+		      gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
+				  && VAR_P (TREE_OPERAND (*argp, 0)));
+		      tree var = TREE_OPERAND (*argp, 0);
+		      if (bitmap_bit_p (suitable_for_renaming, DECL_UID (var)))
+			*argp = null_pointer_node;
+		    }
 		for (i = 0; i < gimple_call_num_args (stmt); ++i)
 		  {
 		    tree *argp = gimple_call_arg_ptr (stmt, i);
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 2/5] omp-low: implement SIMT privatization, part 1
  2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
                   ` (2 preceding siblings ...)
  2017-03-22 15:46 ` [PATCH 3/5] omp-offload: implement SIMT privatization, part 2 Alexander Monakov
@ 2017-03-22 15:46 ` Alexander Monakov
  2017-03-23 10:32   ` Jakub Jelinek
  2017-03-22 15:46 ` [PATCH 4/5] tree-inline: implement SIMT privatization, part 3 Alexander Monakov
  2017-03-31 10:22 ` [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Thomas Schwinge
  5 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-22 15:46 UTC (permalink / raw)
  To: gcc-patches

This patch adjusts privatization in OpenMP SIMD loops lowered for SIMT targets.
At lowering time, private variables receive "omp simt private" attribute, get
mentioned in argument list of GOMP_SIMT_ENTER function, and get a clobbering
assignment just prior to GOMP_SIMT_EXIT function.

The following patch will implement the second step: privatized variables are
converted to fields of a struct allocated by a call to GOMP_SIMT_ENTER_ALLOC.
This function is similar to __builtin_alloca_with_align, except that it
obtains per-SIMT-lane storage and implicitly performs target-specific actions;
on NVPTX that means a transition to per-lane softstacks and inverting the
uniform-simt mask.


	* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
        (expand_GOMP_SIMT_ENTER_ALLOC): New.
        (expand_GOMP_SIMT_EXIT): New.
        * internal-fn.def (GOMP_SIMT_ENTER): New internal function.
        (GOMP_SIMT_ENTER_ALLOC): Ditto.
        (GOMP_SIMT_EXIT): Ditto.
        * target-insns.def (omp_simt_enter): New insn.
        (omp_simt_exit): Ditto.
        * omp-low.c (struct omplow_simd_context): New fields simt_eargs,
        simt_dlist.
        (lower_rec_simd_input_clauses): Implement SIMT privatization.
        (lower_rec_input_clauses): Likewise.
        (lower_lastprivate_clauses): Handle SIMT privatization.

---
 gcc/internal-fn.c    |  42 ++++++++++++++++
 gcc/internal-fn.def  |   3 ++
 gcc/omp-low.c        | 133 +++++++++++++++++++++++++++++++++++++--------------
 gcc/target-insns.def |   2 +
 4 files changed, 143 insertions(+), 37 deletions(-)

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index df7b930..75fe027 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -166,6 +166,48 @@ expand_GOMP_USE_SIMT (internal_fn, gcall *)
   gcc_unreachable ();
 }
 
+/* This should get expanded in omp_device_lower pass.  */
+
+static void
+expand_GOMP_SIMT_ENTER (internal_fn, gcall *)
+{
+  gcc_unreachable ();
+}
+
+/* Allocate per-lane storage and begin non-uniform execution region.  */
+
+static void
+expand_GOMP_SIMT_ENTER_ALLOC (internal_fn, gcall *stmt)
+{
+  rtx target;
+  tree lhs = gimple_call_lhs (stmt);
+  if (lhs)
+    target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  else
+    target = gen_reg_rtx (Pmode);
+  rtx size = expand_normal (gimple_call_arg (stmt, 0));
+  rtx align = expand_normal (gimple_call_arg (stmt, 1));
+  struct expand_operand ops[3];
+  create_output_operand (&ops[0], target, Pmode);
+  create_input_operand (&ops[1], size, Pmode);
+  create_input_operand (&ops[2], align, Pmode);
+  gcc_assert (targetm.have_omp_simt_enter ());
+  expand_insn (targetm.code_for_omp_simt_enter, 3, ops);
+}
+
+/* Deallocate per-lane storage and leave non-uniform execution region.  */
+
+static void
+expand_GOMP_SIMT_EXIT (internal_fn, gcall *stmt)
+{
+  gcc_checking_assert (!gimple_call_lhs (stmt));
+  rtx arg = expand_normal (gimple_call_arg (stmt, 0));
+  struct expand_operand ops[1];
+  create_input_operand (&ops[0], arg, Pmode);
+  gcc_assert (targetm.have_omp_simt_exit ());
+  expand_insn (targetm.code_for_omp_simt_exit, 1, ops);
+}
+
 /* Lane index on SIMT targets: thread index in the warp on NVPTX.  On targets
    without SIMT execution this should be expanded in omp_device_lower pass.  */
 
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 2ba69c9..e162d81 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -142,6 +142,9 @@ DEF_INTERNAL_INT_FN (PARITY, ECF_CONST, parity, unary)
 DEF_INTERNAL_INT_FN (POPCOUNT, ECF_CONST, popcount, unary)
 
 DEF_INTERNAL_FN (GOMP_USE_SIMT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_ENTER_ALLOC, ECF_LEAF | ECF_NOTHROW, NULL)
+DEF_INTERNAL_FN (GOMP_SIMT_EXIT, ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_VF, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (GOMP_SIMT_LAST_LANE, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index c2c69cb..4199668 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3457,6 +3457,8 @@ omp_clause_aligned_alignment (tree clause)
 struct omplow_simd_context {
   tree idx;
   tree lane;
+  vec<tree, va_heap> simt_eargs;
+  gimple_seq simt_dlist;
   int max_vf;
   bool is_simt;
 };
@@ -3492,18 +3494,39 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
   if (sctx->max_vf == 1)
     return false;
 
-  tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
-  tree avar = create_tmp_var_raw (atype);
-  if (TREE_ADDRESSABLE (new_var))
-    TREE_ADDRESSABLE (avar) = 1;
-  DECL_ATTRIBUTES (avar)
-    = tree_cons (get_identifier ("omp simd array"), NULL,
-		 DECL_ATTRIBUTES (avar));
-  gimple_add_tmp_var (avar);
-  ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
-		 NULL_TREE, NULL_TREE);
-  lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
-		 NULL_TREE, NULL_TREE);
+  if (sctx->is_simt)
+    {
+      if (is_gimple_reg (new_var))
+	{
+	  ivar = lvar = new_var;
+	  return true;
+	}
+      tree type = TREE_TYPE (new_var), ptype = build_pointer_type (type);
+      ivar = lvar = create_tmp_var (type);
+      TREE_ADDRESSABLE (ivar) = 1;
+      DECL_ATTRIBUTES (ivar) = tree_cons (get_identifier ("omp simt private"),
+					  NULL, DECL_ATTRIBUTES (ivar));
+      sctx->simt_eargs.safe_push (build1 (ADDR_EXPR, ptype, ivar));
+      tree clobber = build_constructor (type, NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      gimple *g = gimple_build_assign (ivar, clobber);
+      gimple_seq_add_stmt (&sctx->simt_dlist, g);
+    }
+  else
+    {
+      tree atype = build_array_type_nelts (TREE_TYPE (new_var), sctx->max_vf);
+      tree avar = create_tmp_var_raw (atype);
+      if (TREE_ADDRESSABLE (new_var))
+	TREE_ADDRESSABLE (avar) = 1;
+      DECL_ATTRIBUTES (avar)
+	= tree_cons (get_identifier ("omp simd array"), NULL,
+		     DECL_ATTRIBUTES (avar));
+      gimple_add_tmp_var (avar);
+      ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->idx,
+		     NULL_TREE, NULL_TREE);
+      lvar = build4 (ARRAY_REF, TREE_TYPE (new_var), avar, sctx->lane,
+		     NULL_TREE, NULL_TREE);
+    }
   if (DECL_P (new_var))
     {
       SET_DECL_VALUE_EXPR (new_var, lvar);
@@ -3547,8 +3570,8 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
   bool is_simd = (gimple_code (ctx->stmt) == GIMPLE_OMP_FOR
 		  && gimple_omp_for_kind (ctx->stmt) & GF_OMP_FOR_SIMD);
   omplow_simd_context sctx = omplow_simd_context ();
-  tree simt_lane = NULL_TREE;
-  tree ivar = NULL_TREE, lvar = NULL_TREE;
+  tree simt_lane = NULL_TREE, simtrec = NULL_TREE;
+  tree ivar = NULL_TREE, lvar = NULL_TREE, uid = NULL_TREE;
   gimple_seq llist[3] = { };
 
   copyin_seq = NULL;
@@ -3581,6 +3604,10 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	  continue;
 	}
 
+  /* Add a placeholder for simduid.  */
+  if (sctx.is_simt && sctx.max_vf != 1)
+    sctx.simt_eargs.safe_push (NULL_TREE);
+
   /* Do all the fixed sized types in the first pass, and the variable sized
      types in the second pass.  This makes sure that the scalar arguments to
      the variable sized types are processed before we use them in the
@@ -4468,21 +4495,43 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	}
     }
 
-  if (sctx.lane)
+  if (sctx.max_vf == 1)
+    sctx.is_simt = false;
+
+  if (sctx.lane || sctx.is_simt)
     {
-      tree uid = create_tmp_var (ptr_type_node, "simduid");
+      uid = create_tmp_var (ptr_type_node, "simduid");
       /* Don't want uninit warnings on simduid, it is always uninitialized,
 	 but we use it not for the value, but for the DECL_UID only.  */
       TREE_NO_WARNING (uid) = 1;
+      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
+      OMP_CLAUSE__SIMDUID__DECL (c) = uid;
+      OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
+      gimple_omp_for_set_clauses (ctx->stmt, c);
+    }
+  /* Emit GOMP_SIMT_ENTER () to enter non-uniform execution and allocate
+     privatized data.  Initialize pointers to privatized instances.  */
+  if (sctx.is_simt)
+    {
+      sctx.simt_eargs[0] = uid;
+      gimple *g
+	= gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, sctx.simt_eargs);
+      gimple_call_set_lhs (g, uid);
+      gimple_seq_add_stmt (ilist, g);
+      sctx.simt_eargs.release ();
+
+      simtrec = create_tmp_var (pointer_type_node, ".omp_simt");
+      g = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 1, uid);
+      gimple_call_set_lhs (g, simtrec);
+      gimple_seq_add_stmt (ilist, g);
+    }
+  if (sctx.lane)
+    {
       gimple *g
 	= gimple_build_call_internal (IFN_GOMP_SIMD_LANE, 1, uid);
       gimple_call_set_lhs (g, sctx.lane);
       gimple_stmt_iterator gsi = gsi_start_1 (gimple_omp_body_ptr (ctx->stmt));
       gsi_insert_before_without_update (&gsi, g, GSI_SAME_STMT);
-      c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__SIMDUID_);
-      OMP_CLAUSE__SIMDUID__DECL (c) = uid;
-      OMP_CLAUSE_CHAIN (c) = gimple_omp_for_clauses (ctx->stmt);
-      gimple_omp_for_set_clauses (ctx->stmt, c);
       g = gimple_build_assign (sctx.lane, INTEGER_CST,
 			       build_int_cst (unsigned_type_node, 0));
       gimple_seq_add_stmt (ilist, g);
@@ -4545,6 +4594,13 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 	    gimple_seq_add_stmt (seq, gimple_build_label (end));
 	  }
     }
+  if (sctx.is_simt)
+    {
+      gimple_seq_add_seq (dlist, sctx.simt_dlist);
+      gimple *g
+	= gimple_build_call_internal (IFN_GOMP_SIMT_EXIT, 1, simtrec);
+      gimple_seq_add_stmt (dlist, g);
+    }
 
   /* The copyin sequence is not to be executed by the main thread, since
      that would result in self-copies.  Perhaps not visible to scalars,
@@ -4715,7 +4771,8 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
 	    {
 	      tree val = DECL_VALUE_EXPR (new_var);
-	      if (TREE_CODE (val) == ARRAY_REF
+	      if (!maybe_simt
+		  && TREE_CODE (val) == ARRAY_REF
 		  && VAR_P (TREE_OPERAND (val, 0))
 		  && lookup_attribute ("omp simd array",
 				       DECL_ATTRIBUTES (TREE_OPERAND (val,
@@ -4734,24 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 		  new_var = build4 (ARRAY_REF, TREE_TYPE (val),
 				    TREE_OPERAND (val, 0), lastlane,
 				    NULL_TREE, NULL_TREE);
-		  if (maybe_simt)
+		}
+	      else if (maybe_simt
+		       && VAR_P (val)
+		       && lookup_attribute ("omp simt private",
+					    DECL_ATTRIBUTES (val)))
+		{
+		  if (simtlast == NULL)
 		    {
-		      gcall *g;
-		      if (simtlast == NULL)
-			{
-			  simtlast = create_tmp_var (unsigned_type_node);
-			  g = gimple_build_call_internal
-			    (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
-			  gimple_call_set_lhs (g, simtlast);
-			  gimple_seq_add_stmt (stmt_list, g);
-			}
-		      x = build_call_expr_internal_loc
-			(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
-			 TREE_TYPE (new_var), 2, new_var, simtlast);
-		      new_var = unshare_expr (new_var);
-		      gimplify_assign (new_var, x, stmt_list);
-		      new_var = unshare_expr (new_var);
+		      simtlast = create_tmp_var (unsigned_type_node);
+		      gcall *g = gimple_build_call_internal
+			(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
+		      gimple_call_set_lhs (g, simtlast);
+		      gimple_seq_add_stmt (stmt_list, g);
 		    }
+		  x = build_call_expr_internal_loc
+		    (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
+		     TREE_TYPE (val), 2, val, simtlast);
+		  new_var = unshare_expr (new_var);
+		  gimplify_assign (new_var, x, stmt_list);
+		  new_var = unshare_expr (new_var);
 		}
 	    }
 
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
index 2968c87..fb92f72 100644
--- a/gcc/target-insns.def
+++ b/gcc/target-insns.def
@@ -68,6 +68,8 @@ DEF_TARGET_INSN (oacc_dim_pos, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_dim_size, (rtx x0, rtx x1))
 DEF_TARGET_INSN (oacc_fork, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (oacc_join, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_enter, (rtx x0, rtx x1, rtx x2))
+DEF_TARGET_INSN (omp_simt_exit, (rtx x0))
 DEF_TARGET_INSN (omp_simt_lane, (rtx x0))
 DEF_TARGET_INSN (omp_simt_last_lane, (rtx x0, rtx x1))
 DEF_TARGET_INSN (omp_simt_ordered, (rtx x0, rtx x1))
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH 3/5] omp-offload: implement SIMT privatization, part 2
  2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
  2017-03-22 15:46 ` [PATCH 1/5] nvptx: implement SIMT enter/exit insns Alexander Monakov
  2017-03-22 15:46 ` [PATCH 5/5] address-taken: optimize SIMT privatized variables Alexander Monakov
@ 2017-03-22 15:46 ` Alexander Monakov
  2017-03-23 10:37   ` Jakub Jelinek
  2017-03-22 15:46 ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Alexander Monakov
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-22 15:46 UTC (permalink / raw)
  To: gcc-patches

This patch implements rewriting of SIMT private variables as fields of a
struct by setting DECL_VALUE_EXPR on them and regimplifying statements.

	* omp-offload.c: Include langhooks.h, tree-nested.h, stor-layout.h.
        (ompdevlow_adjust_simt_enter): New.
        (find_simtpriv_var_op): New.
        (execute_omp_device_lower): Handle IFN_GOMP_SIMT_ENTER,
        IFN_GOMP_SIMT_ENTER_ALLOC, IFN_GOMP_SIMT_EXIT.
---
 gcc/omp-offload.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)

diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
index d73955c..de27942 100644
--- a/gcc/omp-offload.c
+++ b/gcc/omp-offload.c
@@ -33,12 +33,15 @@ along with GCC; see the file COPYING3.  If not see
 #include "diagnostic-core.h"
 #include "fold-const.h"
 #include "internal-fn.h"
+#include "langhooks.h"
 #include "gimplify.h"
 #include "gimple-iterator.h"
 #include "gimplify-me.h"
 #include "gimple-walk.h"
 #include "tree-cfg.h"
 #include "tree-into-ssa.h"
+#include "tree-nested.h"
+#include "stor-layout.h"
 #include "common/common-target.h"
 #include "omp-general.h"
 #include "omp-offload.h"
@@ -1669,6 +1672,93 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
   return new pass_oacc_device_lower (ctxt);
 }
 
+\f
+
+/* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
+   GOMP_SIMT_ENTER call identifying the privatized variables, which are
+   turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
+   Set *REGIMPLIFY to true, except if no privatized variables were seen.  */
+
+static void
+ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
+{
+  gimple *alloc_stmt = gsi_stmt (*gsi);
+  tree simtrec = gimple_call_lhs (alloc_stmt);
+  tree simduid = gimple_call_arg (alloc_stmt, 0);
+  gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
+  gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
+  tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
+  TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
+  TREE_ADDRESSABLE (rectype) = 1;
+  TREE_TYPE (simtrec) = build_pointer_type (rectype);
+  for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
+    {
+      tree *argp = gimple_call_arg_ptr (enter_stmt, i);
+      if (*argp == null_pointer_node)
+	continue;
+      gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
+		  && VAR_P (TREE_OPERAND (*argp, 0)));
+      tree var = TREE_OPERAND (*argp, 0);
+
+      tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
+			       DECL_NAME (var), TREE_TYPE (var));
+      SET_DECL_ALIGN (field, DECL_ALIGN (var));
+      DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
+      TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
+
+      insert_field_into_struct (rectype, field);
+
+      tree t = build_simple_mem_ref (simtrec);
+      t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
+      TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
+      SET_DECL_VALUE_EXPR (var, t);
+      DECL_HAS_VALUE_EXPR_P (var) = 1;
+      *regimplify = true;
+    }
+  layout_type (rectype);
+  tree size = TYPE_SIZE_UNIT (rectype);
+  tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
+
+  alloc_stmt
+    = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
+  gimple_call_set_lhs (alloc_stmt, simtrec);
+  gsi_replace (gsi, alloc_stmt, false);
+  gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
+  enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
+  gsi_replace (&enter_gsi, enter_stmt, false);
+
+  use_operand_p use;
+  gimple *exit_stmt;
+  if (single_imm_use (simtrec, &use, &exit_stmt))
+    {
+      gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
+      gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
+      tree clobber = build_constructor (rectype, NULL);
+      TREE_THIS_VOLATILE (clobber) = 1;
+      exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
+      gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
+    }
+  else
+    gcc_checking_assert (has_zero_uses (simtrec));
+}
+
+/* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables.  */
+
+static tree
+find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
+{
+  tree t = *tp;
+
+  if (VAR_P (t)
+      && DECL_HAS_VALUE_EXPR_P (t)
+      && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
+    {
+      *walk_subtrees = 0;
+      return t;
+    }
+  return NULL_TREE;
+}
+
 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
    VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
    LANE is kept to be expanded to RTL later on.  Also cleanup all other SIMT
@@ -1679,6 +1769,7 @@ static unsigned int
 execute_omp_device_lower ()
 {
   int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
+  bool regimplify = false;
   basic_block bb;
   gimple_stmt_iterator gsi;
   FOR_EACH_BB_FN (bb, cfun)
@@ -1694,6 +1785,20 @@ execute_omp_device_lower ()
 	  case IFN_GOMP_USE_SIMT:
 	    rhs = vf == 1 ? integer_zero_node : integer_one_node;
 	    break;
+	  case IFN_GOMP_SIMT_ENTER:
+	    rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
+	    goto simtreg_enter_exit;
+	  case IFN_GOMP_SIMT_ENTER_ALLOC:
+	    if (vf != 1)
+	      ompdevlow_adjust_simt_enter (&gsi, &regimplify);
+	    rhs = vf == 1 ? null_pointer_node : NULL_TREE;
+	    goto simtreg_enter_exit;
+	  case IFN_GOMP_SIMT_EXIT:
+simtreg_enter_exit:
+	    if (vf != 1)
+	      continue;
+	    unlink_stmt_vdef (stmt);
+	    break;
 	  case IFN_GOMP_SIMT_LANE:
 	  case IFN_GOMP_SIMT_LAST_LANE:
 	    rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
@@ -1726,6 +1831,16 @@ execute_omp_device_lower ()
 	stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
 	gsi_replace (&gsi, stmt, false);
       }
+  if (regimplify)
+    FOR_EACH_BB_REVERSE_FN (bb, cfun)
+      for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
+	if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
+	  {
+	    if (gimple_clobber_p (gsi_stmt (gsi)))
+	      gsi_remove (&gsi, true);
+	    else
+	      gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
+	  }
   if (vf != 1)
     cfun->has_force_vectorize_loops = false;
   return 0;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/5] omp-low: implement SIMT privatization, part 1
  2017-03-22 15:46 ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Alexander Monakov
@ 2017-03-23 10:32   ` Jakub Jelinek
  2017-03-31 16:05     ` Alexander Monakov
  0 siblings, 1 reply; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 10:32 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Wed, Mar 22, 2017 at 06:46:32PM +0300, Alexander Monakov wrote:
> This patch adjusts privatization in OpenMP SIMD loops lowered for SIMT targets.
> At lowering time, private variables receive "omp simt private" attribute, get
> mentioned in argument list of GOMP_SIMT_ENTER function, and get a clobbering
> assignment just prior to GOMP_SIMT_EXIT function.
> 
> The following patch will implement the second step: privatized variables are
> converted to fields of a struct allocated by a call to GOMP_SIMT_ENTER_ALLOC.
> This function is similar to __builtin_alloca_with_align, except that it
> obtains per-SIMT-lane storage and implicitly performs target-specific actions;
> on NVPTX that means a transition to per-lane softstacks and inverting the
> uniform-simt mask.

Ok for trunk (if all the other patches are acked).

> 	* internal-fn.c (expand_GOMP_SIMT_ENTER): New.
>         (expand_GOMP_SIMT_ENTER_ALLOC): New.
>         (expand_GOMP_SIMT_EXIT): New.
>         * internal-fn.def (GOMP_SIMT_ENTER): New internal function.
>         (GOMP_SIMT_ENTER_ALLOC): Ditto.
>         (GOMP_SIMT_EXIT): Ditto.
>         * target-insns.def (omp_simt_enter): New insn.
>         (omp_simt_exit): Ditto.
>         * omp-low.c (struct omplow_simd_context): New fields simt_eargs,
>         simt_dlist.
>         (lower_rec_simd_input_clauses): Implement SIMT privatization.
>         (lower_rec_input_clauses): Likewise.
>         (lower_lastprivate_clauses): Handle SIMT privatization.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] omp-offload: implement SIMT privatization, part 2
  2017-03-22 15:46 ` [PATCH 3/5] omp-offload: implement SIMT privatization, part 2 Alexander Monakov
@ 2017-03-23 10:37   ` Jakub Jelinek
  2017-03-23 10:53     ` Alexander Monakov
  0 siblings, 1 reply; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 10:37 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Wed, Mar 22, 2017 at 06:46:33PM +0300, Alexander Monakov wrote:
> @@ -1669,6 +1672,93 @@ make_pass_oacc_device_lower (gcc::context *ctxt)
>    return new pass_oacc_device_lower (ctxt);
>  }
>  
> +\f
> +

I'd avoid the empty line after ^L.

> @@ -1694,6 +1785,20 @@ execute_omp_device_lower ()
>  	  case IFN_GOMP_USE_SIMT:
>  	    rhs = vf == 1 ? integer_zero_node : integer_one_node;
>  	    break;
> +	  case IFN_GOMP_SIMT_ENTER:
> +	    rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
> +	    goto simtreg_enter_exit;
> +	  case IFN_GOMP_SIMT_ENTER_ALLOC:
> +	    if (vf != 1)
> +	      ompdevlow_adjust_simt_enter (&gsi, &regimplify);
> +	    rhs = vf == 1 ? null_pointer_node : NULL_TREE;
> +	    goto simtreg_enter_exit;
> +	  case IFN_GOMP_SIMT_EXIT:
> +simtreg_enter_exit:

Please align the label below case, instead of start of the line.

> +	    if (vf != 1)
> +	      continue;
> +	    unlink_stmt_vdef (stmt);

This is weird.  AFAIK unlink_stmt_vdef just replaces the uses of the vdef
of that stmt with the vuse, but it still keeps the vdef (and vuse) around
on the stmt, typically it is used when you are removing that stmt, but
that is not the case here.  So why are you doing it and not say removing the
vdef?

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-22 15:46 ` [PATCH 4/5] tree-inline: implement SIMT privatization, part 3 Alexander Monakov
@ 2017-03-23 10:47   ` Jakub Jelinek
  2017-03-23 11:13     ` Alexander Monakov
  0 siblings, 1 reply; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 10:47 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Wed, Mar 22, 2017 at 06:46:34PM +0300, Alexander Monakov wrote:
> @@ -4730,6 +4746,25 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
>    if (cfun->gimple_df)
>      pt_solution_reset (&cfun->gimple_df->escaped);
>  
> +  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
> +  if (id->dst_simt_vars)
> +    {
> +      size_t nargs = gimple_call_num_args (simtenter_stmt);
> +      hash_set<tree> *vars = id->dst_simt_vars;
> +      auto_vec<tree> newargs (nargs + vars->elements ());
> +      for (size_t i = 0; i < nargs; i++)
> +	newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
> +      for (hash_set<tree>::iterator i = vars->begin (); i != vars->end (); ++i)
> +	newargs.quick_push (build1 (ADDR_EXPR,
> +				    build_pointer_type (TREE_TYPE (*i)), *i));

Traversing a hash table where the traversal affects code generation is
-fcompare-debug unfriendly.
Do you actually need a hash_set and not say just a vec of the vars?  I can't
find where you'd actually do any lookups there, just add and traverse.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 5/5] address-taken: optimize SIMT privatized variables
  2017-03-22 15:46 ` [PATCH 5/5] address-taken: optimize SIMT privatized variables Alexander Monakov
@ 2017-03-23 10:48   ` Jakub Jelinek
  0 siblings, 0 replies; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 10:48 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Wed, Mar 22, 2017 at 06:46:35PM +0300, Alexander Monakov wrote:
> This patch implements promotion of SIMT private variables if GOMP_SIMT_ENTER
> is the only remaining statement where their address is taken, by handling it
> similar to ASAN_MARK.
> 
> To avoid rebuilding GOMP_SIMT_ENTER statement from scratch, set argument
> slot to a null pointer when the corresponding variable is optimized.
> 
> 	* tree-ssa.c (execute_update_addresses_taken): Handle GOMP_SIMT_ENTER.

Ok for trunk (if the rest is approved).

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] omp-offload: implement SIMT privatization, part 2
  2017-03-23 10:37   ` Jakub Jelinek
@ 2017-03-23 10:53     ` Alexander Monakov
  2017-03-23 11:19       ` Jakub Jelinek
  0 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-23 10:53 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> > +	    if (vf != 1)
> > +	      continue;
> > +	    unlink_stmt_vdef (stmt);
> 
> This is weird.  AFAIK unlink_stmt_vdef just replaces the uses of the vdef
> of that stmt with the vuse, but it still keeps the vdef (and vuse) around
> on the stmt, typically it is used when you are removing that stmt, but
> that is not the case here.  So why are you doing it and not say removing the
> vdef?

Maybe I misunderstand your question, but actually the statement is removed
further below, when we break out of the switch:

        stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
        gsi_replace (&gsi, stmt, false);

The same tactic is already in place for cleaning up GOMP_SIMT_ORDERED_PRED.

Thus, there's just one place that actually replaces statements; the body of
the switch is only responsible for coming up with a suitable rhs and cleaning
up vdefs for those statements where we know they would be present.

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-23 10:47   ` Jakub Jelinek
@ 2017-03-23 11:13     ` Alexander Monakov
  2017-03-23 11:25       ` Jakub Jelinek
  0 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-23 11:13 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> On Wed, Mar 22, 2017 at 06:46:34PM +0300, Alexander Monakov wrote:
> > @@ -4730,6 +4746,25 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
> >    if (cfun->gimple_df)
> >      pt_solution_reset (&cfun->gimple_df->escaped);
> >  
> > +  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
> > +  if (id->dst_simt_vars)
> > +    {
> > +      size_t nargs = gimple_call_num_args (simtenter_stmt);
> > +      hash_set<tree> *vars = id->dst_simt_vars;
> > +      auto_vec<tree> newargs (nargs + vars->elements ());
> > +      for (size_t i = 0; i < nargs; i++)
> > +	newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
> > +      for (hash_set<tree>::iterator i = vars->begin (); i != vars->end (); ++i)
> > +	newargs.quick_push (build1 (ADDR_EXPR,
> > +				    build_pointer_type (TREE_TYPE (*i)), *i));
> 
> Traversing a hash table where the traversal affects code generation is
> -fcompare-debug unfriendly.
> Do you actually need a hash_set and not say just a vec of the vars?  I can't
> find where you'd actually do any lookups there, just add and traverse.

Sorry for missing the IR stability issue.  This code relies on dst_simt_vars
being a set and thus having no duplicate entries (so the implicit lookup when
adding an element is needed).

However, I think I was overly cautious: looking again, I think we can't enter
copy_decl_for_dup_finish twice with the same 'copy' VAR_DECL.  Changing it to a
vec should be fine then?

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 3/5] omp-offload: implement SIMT privatization, part 2
  2017-03-23 10:53     ` Alexander Monakov
@ 2017-03-23 11:19       ` Jakub Jelinek
  0 siblings, 0 replies; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 11:19 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Thu, Mar 23, 2017 at 01:53:37PM +0300, Alexander Monakov wrote:
> On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> > > +	    if (vf != 1)
> > > +	      continue;
> > > +	    unlink_stmt_vdef (stmt);
> > 
> > This is weird.  AFAIK unlink_stmt_vdef just replaces the uses of the vdef
> > of that stmt with the vuse, but it still keeps the vdef (and vuse) around
> > on the stmt, typically it is used when you are removing that stmt, but
> > that is not the case here.  So why are you doing it and not say removing the
> > vdef?
> 
> Maybe I misunderstand your question, but actually the statement is removed
> further below, when we break out of the switch:

Ah, ok, missed that.  Thus, the patch is ok with those 2 nits fixed.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-23 11:13     ` Alexander Monakov
@ 2017-03-23 11:25       ` Jakub Jelinek
  2017-03-23 16:15         ` Alexander Monakov
  0 siblings, 1 reply; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 11:25 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Thu, Mar 23, 2017 at 02:13:45PM +0300, Alexander Monakov wrote:
> On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> > On Wed, Mar 22, 2017 at 06:46:34PM +0300, Alexander Monakov wrote:
> > > @@ -4730,6 +4746,25 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
> > >    if (cfun->gimple_df)
> > >      pt_solution_reset (&cfun->gimple_df->escaped);
> > >  
> > > +  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
> > > +  if (id->dst_simt_vars)
> > > +    {
> > > +      size_t nargs = gimple_call_num_args (simtenter_stmt);
> > > +      hash_set<tree> *vars = id->dst_simt_vars;
> > > +      auto_vec<tree> newargs (nargs + vars->elements ());
> > > +      for (size_t i = 0; i < nargs; i++)
> > > +	newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
> > > +      for (hash_set<tree>::iterator i = vars->begin (); i != vars->end (); ++i)
> > > +	newargs.quick_push (build1 (ADDR_EXPR,
> > > +				    build_pointer_type (TREE_TYPE (*i)), *i));
> > 
> > Traversing a hash table where the traversal affects code generation is
> > -fcompare-debug unfriendly.
> > Do you actually need a hash_set and not say just a vec of the vars?  I can't
> > find where you'd actually do any lookups there, just add and traverse.
> 
> Sorry for missing the IR stability issue.  This code relies on dst_simt_vars
> being a set and thus having no duplicate entries (so the implicit lookup when
> adding an element is needed).
> 
> However, I think I was overly cautious: looking again, I think we can't enter
> copy_decl_for_dup_finish twice with the same 'copy' VAR_DECL.  Changing it to a
> vec should be fine then?

Yeah, callers of copy_decl* should look the orig var in the decl map first,
plus if you look at all copy_decl_for_dup_finish callers, all of them first
create a new tree (usually copy_node) and then pass it as copy to
copy_decl_for_dup_finish, so you'll never get the same copy in there.

So, please change it into a vector.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-23 11:25       ` Jakub Jelinek
@ 2017-03-23 16:15         ` Alexander Monakov
  2017-03-23 16:23           ` Jakub Jelinek
  0 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-23 16:15 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5778 bytes --]

On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> > Sorry for missing the IR stability issue.  This code relies on dst_simt_vars
> > being a set and thus having no duplicate entries (so the implicit lookup when
> > adding an element is needed).
> > 
> > However, I think I was overly cautious: looking again, I think we can't enter
> > copy_decl_for_dup_finish twice with the same 'copy' VAR_DECL.  Changing it to a
> > vec should be fine then?
> 
> Yeah, callers of copy_decl* should look the orig var in the decl map first,
> plus if you look at all copy_decl_for_dup_finish callers, all of them first
> create a new tree (usually copy_node) and then pass it as copy to
> copy_decl_for_dup_finish, so you'll never get the same copy in there.
> 
> So, please change it into a vector.

Thanks — here's the updated patch.  I've also noticed there's no need to rebuild
the existing SIMT_ENTER statement if we didn't add any new privatized variables.

	* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
        * tree-inline.c (expand_call_inline): Handle SIMT privatization.
        (copy_decl_for_dup_finish): Ditto.
---
 gcc/tree-inline.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++------
 gcc/tree-inline.h |  4 ++++
 2 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 6b6d489..a84e569 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4385,6 +4385,11 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   gcall *call_stmt;
   unsigned int i;
   unsigned int prop_mask, src_properties;
+  struct function *dst_cfun;
+  tree simduid;
+  use_operand_p use;
+  gimple *simtenter_stmt = NULL;
+  vec<tree> *simtvars_st = NULL;
 
   /* The gimplifier uses input_location in too many places, such as
      internal_get_tmp_var ().  */
@@ -4588,15 +4593,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   id->src_cfun = DECL_STRUCT_FUNCTION (fn);
   id->call_stmt = call_stmt;
 
+  /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
+     variables to be added to IFN_GOMP_SIMT_ENTER argument list.  */
+  dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
+  if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
+      && (simduid = bb->loop_father->simduid) != NULL_TREE
+      && (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
+      && single_imm_use (simduid, &use, &simtenter_stmt)
+      && is_gimple_call (simtenter_stmt)
+      && gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
+    {
+      simtvars_st = id->dst_simt_vars;
+      vec_alloc (id->dst_simt_vars, 0);
+    }
+
   /* If the src function contains an IFN_VA_ARG, then so will the dst
      function after inlining.  Likewise for IFN_GOMP_USE_SIMT.  */
   prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
   src_properties = id->src_cfun->curr_properties & prop_mask;
   if (src_properties != prop_mask)
-    {
-      struct function *dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
-      dst_cfun->curr_properties &= src_properties | ~prop_mask;
-    }
+    dst_cfun->curr_properties &= src_properties | ~prop_mask;
 
   gcc_assert (!id->src_cfun->after_inlining);
 
@@ -4730,6 +4746,31 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   if (cfun->gimple_df)
     pt_solution_reset (&cfun->gimple_df->escaped);
 
+  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
+  if (id->dst_simt_vars)
+    {
+      if (id->dst_simt_vars->length () > 0)
+	{
+	  size_t nargs = gimple_call_num_args (simtenter_stmt);
+	  vec<tree> *vars = id->dst_simt_vars;
+	  auto_vec<tree> newargs (nargs + vars->length ());
+	  for (size_t i = 0; i < nargs; i++)
+	    newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
+	  for (tree *pvar = vars->begin (); pvar != vars->end (); pvar++)
+	    {
+	      tree ptrtype = build_pointer_type (TREE_TYPE (*pvar));
+	      newargs.quick_push (build1 (ADDR_EXPR, ptrtype, *pvar));
+	    }
+	  gcall *g
+	    = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
+	  gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
+	  gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
+	  gsi_replace (&gsi, g, false);
+	}
+      vec_free (id->dst_simt_vars);
+      id->dst_simt_vars = simtvars_st;
+    }
+
   /* Clean up.  */
   if (id->debug_map)
     {
@@ -5453,9 +5494,19 @@ copy_decl_for_dup_finish (copy_body_data *id, tree decl, tree copy)
        function.  */
     ;
   else
-    /* Ordinary automatic local variables are now in the scope of the
-       new function.  */
-    DECL_CONTEXT (copy) = id->dst_fn;
+    {
+      /* Ordinary automatic local variables are now in the scope of the
+	 new function.  */
+      DECL_CONTEXT (copy) = id->dst_fn;
+      if (VAR_P (copy) && id->dst_simt_vars && !is_gimple_reg (copy))
+	{
+	  if (!lookup_attribute ("omp simt private", DECL_ATTRIBUTES (copy)))
+	    DECL_ATTRIBUTES (copy)
+	      = tree_cons (get_identifier ("omp simt private"), NULL,
+			   DECL_ATTRIBUTES (copy));
+	  id->dst_simt_vars->safe_push (copy);
+	}
+    }
 
   return copy;
 }
diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h
index 88b3286..ffb8333 100644
--- a/gcc/tree-inline.h
+++ b/gcc/tree-inline.h
@@ -145,6 +145,10 @@ struct copy_body_data
      equivalents in the function into which it is being inlined.  */
   hash_map<dependence_hash, unsigned short> *dependence_map;
 
+  /* A list of addressable local variables remapped into the caller
+     when inlining a call within an OpenMP SIMD-on-SIMT loop.  */
+  vec<tree> *dst_simt_vars;
+
   /* Cilk keywords currently need to replace some variables that
      ordinary nested functions do not.  */
   bool remap_var_for_cilk;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-23 16:15         ` Alexander Monakov
@ 2017-03-23 16:23           ` Jakub Jelinek
  2017-03-23 17:02             ` Alexander Monakov
  0 siblings, 1 reply; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 16:23 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Thu, Mar 23, 2017 at 07:15:52PM +0300, Alexander Monakov wrote:
> 	* tree-inline.h (struct copy_body_data): New field dst_simt_vars.
>         * tree-inline.c (expand_call_inline): Handle SIMT privatization.
>         (copy_decl_for_dup_finish): Ditto.
> ---
>  gcc/tree-inline.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++------
>  gcc/tree-inline.h |  4 ++++
>  2 files changed, 62 insertions(+), 7 deletions(-)
> 
> @@ -4588,15 +4593,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
>    id->src_cfun = DECL_STRUCT_FUNCTION (fn);
>    id->call_stmt = call_stmt;
>  
> +  /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
> +     variables to be added to IFN_GOMP_SIMT_ENTER argument list.  */
> +  dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
> +  if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
> +      && (simduid = bb->loop_father->simduid) != NULL_TREE
> +      && (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
> +      && single_imm_use (simduid, &use, &simtenter_stmt)
> +      && is_gimple_call (simtenter_stmt)
> +      && gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
> +    {
> +      simtvars_st = id->dst_simt_vars;
> +      vec_alloc (id->dst_simt_vars, 0);
> +    }

One more thing.  If the above if condition is false, you keep
id->dst_simt_vars what it was (which means simtvars_st is NULL).
If it was non-NULL already, then:

> @@ -4730,6 +4746,31 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
>    if (cfun->gimple_df)
>      pt_solution_reset (&cfun->gimple_df->escaped);
>  
> +  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
> +  if (id->dst_simt_vars)
> +    {

This will be true.

> +      if (id->dst_simt_vars->length () > 0)
> +	{
> +	  size_t nargs = gimple_call_num_args (simtenter_stmt);
> +	  vec<tree> *vars = id->dst_simt_vars;
> +	  auto_vec<tree> newargs (nargs + vars->length ());
> +	  for (size_t i = 0; i < nargs; i++)
> +	    newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
> +	  for (tree *pvar = vars->begin (); pvar != vars->end (); pvar++)
> +	    {
> +	      tree ptrtype = build_pointer_type (TREE_TYPE (*pvar));
> +	      newargs.quick_push (build1 (ADDR_EXPR, ptrtype, *pvar));
> +	    }
> +	  gcall *g
> +	    = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
> +	  gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
> +	  gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
> +	  gsi_replace (&gsi, g, false);
> +	}

And you handle dst_simt_vars from some other invocation.

> +      vec_free (id->dst_simt_vars);
> +      id->dst_simt_vars = simtvars_st;

And then clear it.  That doesn't look like the right thing.

So either you need some bool variable whether you've actually allocated
the vector in the current expand_call_inline and use that instead of
if (id->dst_simt_vars), or maybe you should clear id->dst_simt_vars
otherwise and save/restore it around unconditionally.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-23 16:23           ` Jakub Jelinek
@ 2017-03-23 17:02             ` Alexander Monakov
  2017-03-23 17:09               ` Jakub Jelinek
  0 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-23 17:02 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> And then clear it.  That doesn't look like the right thing.
> 
> So either you need some bool variable whether you've actually allocated
> the vector in the current expand_call_inline and use that instead of
> if (id->dst_simt_vars), or maybe you should clear id->dst_simt_vars
> otherwise and save/restore it around unconditionally.

Yes, thanks for catching this.  I went for the latter approach in the following
patch.

---
 gcc/tree-inline.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 gcc/tree-inline.h |  4 ++++
 2 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 6b6d489..b3bb3d6 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4385,6 +4385,11 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   gcall *call_stmt;
   unsigned int i;
   unsigned int prop_mask, src_properties;
+  struct function *dst_cfun;
+  tree simduid;
+  use_operand_p use;
+  gimple *simtenter_stmt = NULL;
+  vec<tree> *simtvars_save;
 
   /* The gimplifier uses input_location in too many places, such as
      internal_get_tmp_var ().  */
@@ -4588,15 +4593,26 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   id->src_cfun = DECL_STRUCT_FUNCTION (fn);
   id->call_stmt = call_stmt;
 
+  /* When inlining into an OpenMP SIMD-on-SIMT loop, arrange for new automatic
+     variables to be added to IFN_GOMP_SIMT_ENTER argument list.  */
+  dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
+  simtvars_save = id->dst_simt_vars;
+  if (!(dst_cfun->curr_properties & PROP_gimple_lomp_dev)
+      && (simduid = bb->loop_father->simduid) != NULL_TREE
+      && (simduid = ssa_default_def (dst_cfun, simduid)) != NULL_TREE
+      && single_imm_use (simduid, &use, &simtenter_stmt)
+      && is_gimple_call (simtenter_stmt)
+      && gimple_call_internal_p (simtenter_stmt, IFN_GOMP_SIMT_ENTER))
+    vec_alloc (id->dst_simt_vars, 0);
+  else
+    id->dst_simt_vars = NULL;
+
   /* If the src function contains an IFN_VA_ARG, then so will the dst
      function after inlining.  Likewise for IFN_GOMP_USE_SIMT.  */
   prop_mask = PROP_gimple_lva | PROP_gimple_lomp_dev;
   src_properties = id->src_cfun->curr_properties & prop_mask;
   if (src_properties != prop_mask)
-    {
-      struct function *dst_cfun = DECL_STRUCT_FUNCTION (id->dst_fn);
-      dst_cfun->curr_properties &= src_properties | ~prop_mask;
-    }
+    dst_cfun->curr_properties &= src_properties | ~prop_mask;
 
   gcc_assert (!id->src_cfun->after_inlining);
 
@@ -4730,6 +4746,27 @@ expand_call_inline (basic_block bb, gimple *stmt, copy_body_data *id)
   if (cfun->gimple_df)
     pt_solution_reset (&cfun->gimple_df->escaped);
 
+  /* Add new automatic variables to IFN_GOMP_SIMT_ENTER arguments.  */
+  if (id->dst_simt_vars && id->dst_simt_vars->length () > 0)
+    {
+      size_t nargs = gimple_call_num_args (simtenter_stmt);
+      vec<tree> *vars = id->dst_simt_vars;
+      auto_vec<tree> newargs (nargs + vars->length ());
+      for (size_t i = 0; i < nargs; i++)
+	newargs.quick_push (gimple_call_arg (simtenter_stmt, i));
+      for (tree *pvar = vars->begin (); pvar != vars->end (); pvar++)
+	{
+	  tree ptrtype = build_pointer_type (TREE_TYPE (*pvar));
+	  newargs.quick_push (build1 (ADDR_EXPR, ptrtype, *pvar));
+	}
+      gcall *g = gimple_build_call_internal_vec (IFN_GOMP_SIMT_ENTER, newargs);
+      gimple_call_set_lhs (g, gimple_call_lhs (simtenter_stmt));
+      gimple_stmt_iterator gsi = gsi_for_stmt (simtenter_stmt);
+      gsi_replace (&gsi, g, false);
+    }
+  vec_free (id->dst_simt_vars);
+  id->dst_simt_vars = simtvars_save;
+
   /* Clean up.  */
   if (id->debug_map)
     {
@@ -5453,9 +5490,19 @@ copy_decl_for_dup_finish (copy_body_data *id, tree decl, tree copy)
        function.  */
     ;
   else
-    /* Ordinary automatic local variables are now in the scope of the
-       new function.  */
-    DECL_CONTEXT (copy) = id->dst_fn;
+    {
+      /* Ordinary automatic local variables are now in the scope of the
+	 new function.  */
+      DECL_CONTEXT (copy) = id->dst_fn;
+      if (VAR_P (copy) && id->dst_simt_vars && !is_gimple_reg (copy))
+	{
+	  if (!lookup_attribute ("omp simt private", DECL_ATTRIBUTES (copy)))
+	    DECL_ATTRIBUTES (copy)
+	      = tree_cons (get_identifier ("omp simt private"), NULL,
+			   DECL_ATTRIBUTES (copy));
+	  id->dst_simt_vars->safe_push (copy);
+	}
+    }
 
   return copy;
 }
diff --git a/gcc/tree-inline.h b/gcc/tree-inline.h
index 88b3286..ffb8333 100644
--- a/gcc/tree-inline.h
+++ b/gcc/tree-inline.h
@@ -145,6 +145,10 @@ struct copy_body_data
      equivalents in the function into which it is being inlined.  */
   hash_map<dependence_hash, unsigned short> *dependence_map;
 
+  /* A list of addressable local variables remapped into the caller
+     when inlining a call within an OpenMP SIMD-on-SIMT loop.  */
+  vec<tree> *dst_simt_vars;
+
   /* Cilk keywords currently need to replace some variables that
      ordinary nested functions do not.  */
   bool remap_var_for_cilk;
-- 
1.8.3.1

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 4/5] tree-inline: implement SIMT privatization, part 3
  2017-03-23 17:02             ` Alexander Monakov
@ 2017-03-23 17:09               ` Jakub Jelinek
  0 siblings, 0 replies; 27+ messages in thread
From: Jakub Jelinek @ 2017-03-23 17:09 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Thu, Mar 23, 2017 at 08:00:11PM +0300, Alexander Monakov wrote:
> On Thu, 23 Mar 2017, Jakub Jelinek wrote:
> > And then clear it.  That doesn't look like the right thing.
> > 
> > So either you need some bool variable whether you've actually allocated
> > the vector in the current expand_call_inline and use that instead of
> > if (id->dst_simt_vars), or maybe you should clear id->dst_simt_vars
> > otherwise and save/restore it around unconditionally.
> 
> Yes, thanks for catching this.  I went for the latter approach in the following
> patch.

Ok for trunk, thanks.

For the nvptx bits, I think you need to ask Bernd to review it.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/5] nvptx: implement SIMT enter/exit insns
  2017-03-22 15:46 ` [PATCH 1/5] nvptx: implement SIMT enter/exit insns Alexander Monakov
@ 2017-03-27 11:12   ` Alexander Monakov
  2017-03-27 15:03     ` Bernd Schmidt
  0 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-03-27 11:12 UTC (permalink / raw)
  To: gcc-patches; +Cc: Bernd Schmidt

Hello Bernd,

Can you have a look at this patch (unchanged from previous posting in January)?
The rest of the patches in the set are reviewed.

On Wed, 22 Mar 2017, Alexander Monakov wrote:

> This patch adds handling of new omp_simt_enter/omp_simt_exit named insns
> in the NVPTX backend.
> 
> 	* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
>         (nvptx_output_simt_exit): Declare.
>         * config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
>         cfun->machine->unisimt_location.  Handle NULL unisimt_predicate.
>         (init_softstack_frame): Move initialization of crtl->is_leaf to...
>         (nvptx_declare_function_name): ...here.  Emit declaration of local
>         memory space buffer for omp_simt_enter insn.
>         (nvptx_output_unisimt_switch): New.
>         (nvptx_output_softstack_switch): New.
>         (nvptx_output_simt_enter): New.
>         (nvptx_output_simt_exit): New.
>         * config/nvptx/nvptx.h (struct machine_function): New fields
>         has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
>         * config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
>         (UNSPECV_SIMT_EXIT): Ditto.
>         (omp_simt_enter_insn): New insn.
>         (omp_simt_enter): New expansion.
>         (omp_simt_exit): New insn.
>         * config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 1/5] nvptx: implement SIMT enter/exit insns
  2017-03-27 11:12   ` Alexander Monakov
@ 2017-03-27 15:03     ` Bernd Schmidt
  0 siblings, 0 replies; 27+ messages in thread
From: Bernd Schmidt @ 2017-03-27 15:03 UTC (permalink / raw)
  To: Alexander Monakov, gcc-patches

On 03/27/2017 12:56 PM, Alexander Monakov wrote:
> Hello Bernd,
>
> Can you have a look at this patch (unchanged from previous posting in January)?
> The rest of the patches in the set are reviewed.
>
> On Wed, 22 Mar 2017, Alexander Monakov wrote:
>
>> This patch adds handling of new omp_simt_enter/omp_simt_exit named insns
>> in the NVPTX backend.
>>
>> 	* config/nvptx/nvptx-protos.h (nvptx_output_simt_enter): Declare.
>>         (nvptx_output_simt_exit): Declare.
>>         * config/nvptx/nvptx.c (nvptx_init_unisimt_predicate): Use
>>         cfun->machine->unisimt_location.  Handle NULL unisimt_predicate.
>>         (init_softstack_frame): Move initialization of crtl->is_leaf to...
>>         (nvptx_declare_function_name): ...here.  Emit declaration of local
>>         memory space buffer for omp_simt_enter insn.
>>         (nvptx_output_unisimt_switch): New.
>>         (nvptx_output_softstack_switch): New.
>>         (nvptx_output_simt_enter): New.
>>         (nvptx_output_simt_exit): New.
>>         * config/nvptx/nvptx.h (struct machine_function): New fields
>>         has_simtreg, unisimt_location, simt_stack_size, simt_stack_align.
>>         * config/nvptx/nvptx.md (UNSPECV_SIMT_ENTER): New unspec.
>>         (UNSPECV_SIMT_EXIT): Ditto.
>>         (omp_simt_enter_insn): New insn.
>>         (omp_simt_enter): New expansion.
>>         (omp_simt_exit): New insn.
>>         * config/nvptx/nvptx.opt (msoft-stack-reserve-local): New option.

Technically this whole series isn't a regression fix, but since Jakub 
has acked the rest, this is OK too.


Bernd

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions
  2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
                   ` (4 preceding siblings ...)
  2017-03-22 15:46 ` [PATCH 4/5] tree-inline: implement SIMT privatization, part 3 Alexander Monakov
@ 2017-03-31 10:22 ` Thomas Schwinge
  5 siblings, 0 replies; 27+ messages in thread
From: Thomas Schwinge @ 2017-03-31 10:22 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

Hi!

On Wed, 22 Mar 2017 18:46:30 +0300, Alexander Monakov <amonakov@ispras.ru> wrote:
> This patchset implements privatization of addressable variables in OpenMP SIMD
> regions lowered for SIMT targets (i.e. NVPTX) via the approach identified in
> the review of the previous submission.  [...]

Given that the subject describes this to "improve correctness in SIMD
regions": no test cases for this one?


I observe a few more instances of
"[...]/gcc/config/nvptx/nvptx.md:[...]:1: warning: operand [...]  missing
mode?".  Maybe something to look at/resolve, at some point.


And:

    [...]/gcc/config/nvptx/nvptx.c: In function 'void nvptx_output_softstack_switch(FILE*, bool, rtx, rtx, rtx)':
    [...]/gcc/config/nvptx/nvptx.c:1357:39: warning: format '%d' expects argument of type 'int', but argument 6 has type 'long unsigned int' [-Wformat]


Grüße
 Thomas

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/5] omp-low: implement SIMT privatization, part 1
  2017-03-23 10:32   ` Jakub Jelinek
@ 2017-03-31 16:05     ` Alexander Monakov
  2017-04-07  7:58       ` [PATCH] omp-low: fix lastprivate/linear lowering for SIMT Alexander Monakov
  2017-04-20 15:32       ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Jakub Jelinek
  0 siblings, 2 replies; 27+ messages in thread
From: Alexander Monakov @ 2017-03-31 16:05 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Hello Jakub,

I've noticed while re-reading that this patch incorrectly handled SIMT case
in lower_lastprivate_clauses.  The code was changed to look for variables
with "omp simt private" attribute, and was left under
'simduid && DECL_HAS_VALUE_EXPR_P (new_var)' condition.  This effectively
constrained processing to privatized (i.e. addressable) variables, as
non-addressable variables now have neither the value-expr nor the attribute.

This wasn't caught in testing, as apparently all testcases that have target
simd loops with linear/lastprivate clauses also have the corresponding variables
mentioned in target map clause, which makes them addressable (is that necessary?),
and I didn't think to check something like that manually.

The following patch fixes it by adjusting the if's in lower_lastprivate_clauses;
alternatively it may be possible to keep that code as-is, and instead set up
value-expr and "omp simt private" attributes for all formally private variables,
not just addressable ones, but to me that sounds less preferable.  OK for trunk?

I think it's possible to improve test coverage for NVPTX by running all OpenMP
testcases via nvptx-none-run (it'll need some changes, but shouldn't be hard).

gcc/
	* omp-low.c (lower_lastprivate_clauses): Correct handling of linear and
	lastprivate clauses in SIMT case.

libgomp/
	* testsuite/libgomp.c/target-36.c: New testcase.

Thanks.
Alexander
	
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 253dc85..02b681c 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -4768,11 +4768,10 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 		TREE_NO_WARNING (new_var) = 1;
 	    }
 
-	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
+	  if (!maybe_simt && simduid && DECL_HAS_VALUE_EXPR_P (new_var))
 	    {
 	      tree val = DECL_VALUE_EXPR (new_var);
-	      if (!maybe_simt
-		  && TREE_CODE (val) == ARRAY_REF
+	      if (TREE_CODE (val) == ARRAY_REF
 		  && VAR_P (TREE_OPERAND (val, 0))
 		  && lookup_attribute ("omp simd array",
 				       DECL_ATTRIBUTES (TREE_OPERAND (val,
@@ -4792,26 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
 				    TREE_OPERAND (val, 0), lastlane,
 				    NULL_TREE, NULL_TREE);
 		}
-	      else if (maybe_simt
-		       && VAR_P (val)
-		       && lookup_attribute ("omp simt private",
-					    DECL_ATTRIBUTES (val)))
+	    }
+	  else if (maybe_simt)
+	    {
+	      tree val = (DECL_HAS_VALUE_EXPR_P (new_var)
+			  ? DECL_VALUE_EXPR (new_var)
+			  : new_var);
+	      if (simtlast == NULL)
 		{
-		  if (simtlast == NULL)
-		    {
-		      simtlast = create_tmp_var (unsigned_type_node);
-		      gcall *g = gimple_build_call_internal
-			(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
-		      gimple_call_set_lhs (g, simtlast);
-		      gimple_seq_add_stmt (stmt_list, g);
-		    }
-		  x = build_call_expr_internal_loc
-		    (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
-		     TREE_TYPE (val), 2, val, simtlast);
-		  new_var = unshare_expr (new_var);
-		  gimplify_assign (new_var, x, stmt_list);
-		  new_var = unshare_expr (new_var);
+		  simtlast = create_tmp_var (unsigned_type_node);
+		  gcall *g = gimple_build_call_internal
+		    (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
+		  gimple_call_set_lhs (g, simtlast);
+		  gimple_seq_add_stmt (stmt_list, g);
 		}
+	      x = build_call_expr_internal_loc
+		(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
+		 TREE_TYPE (val), 2, val, simtlast);
+	      new_var = unshare_expr (new_var);
+	      gimplify_assign (new_var, x, stmt_list);
+	      new_var = unshare_expr (new_var);
 	    }
 
 	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
new file mode 100644
index 0000000..6925a2a
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/target-36.c
@@ -0,0 +1,18 @@
+int
+main ()
+{
+  int ah, bh, n = 1024;
+#pragma omp target map(from: ah, bh)
+  {
+    int a, b;
+#pragma omp simd lastprivate(b)
+    for (a = 0; a < n; a++)
+      {
+	b = a + n + 1;
+	asm volatile ("" : "+r"(b));
+      }
+    ah = a, bh = b;
+  }
+  if (ah != n || bh != 2 * n)
+    __builtin_abort ();
+}

^ permalink raw reply	[flat|nested] 27+ messages in thread

* [PATCH] omp-low: fix lastprivate/linear lowering for SIMT
  2017-03-31 16:05     ` Alexander Monakov
@ 2017-04-07  7:58       ` Alexander Monakov
  2017-04-20 12:55         ` Alexander Monakov
  2017-04-20 15:32       ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Jakub Jelinek
  1 sibling, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-04-07  7:58 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Ping.

> I've noticed while re-reading that this patch incorrectly handled SIMT case
> in lower_lastprivate_clauses.  The code was changed to look for variables
> with "omp simt private" attribute, and was left under
> 'simduid && DECL_HAS_VALUE_EXPR_P (new_var)' condition.  This effectively
> constrained processing to privatized (i.e. addressable) variables, as
> non-addressable variables now have neither the value-expr nor the attribute.
> 
> This wasn't caught in testing, as apparently all testcases that have target
> simd loops with linear/lastprivate clauses also have the corresponding variables
> mentioned in target map clause, which makes them addressable (is that necessary?),
> and I didn't think to check something like that manually.
> 
> The following patch fixes it by adjusting the if's in lower_lastprivate_clauses;
> alternatively it may be possible to keep that code as-is, and instead set up
> value-expr and "omp simt private" attributes for all formally private variables,
> not just addressable ones, but to me that sounds less preferable.  OK for trunk?
> 
> I think it's possible to improve test coverage for NVPTX by running all OpenMP
> testcases via nvptx-none-run (it'll need some changes, but shouldn't be hard).
> 
> gcc/
> 	* omp-low.c (lower_lastprivate_clauses): Correct handling of linear and
> 	lastprivate clauses in SIMT case.
> 
> libgomp/
> 	* testsuite/libgomp.c/target-36.c: New testcase.
> 
> Thanks.
> Alexander
> 	
> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> index 253dc85..02b681c 100644
> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -4768,11 +4768,10 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
>  		TREE_NO_WARNING (new_var) = 1;
>  	    }
>  
> -	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
> +	  if (!maybe_simt && simduid && DECL_HAS_VALUE_EXPR_P (new_var))
>  	    {
>  	      tree val = DECL_VALUE_EXPR (new_var);
> -	      if (!maybe_simt
> -		  && TREE_CODE (val) == ARRAY_REF
> +	      if (TREE_CODE (val) == ARRAY_REF
>  		  && VAR_P (TREE_OPERAND (val, 0))
>  		  && lookup_attribute ("omp simd array",
>  				       DECL_ATTRIBUTES (TREE_OPERAND (val,
> @@ -4792,26 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
>  				    TREE_OPERAND (val, 0), lastlane,
>  				    NULL_TREE, NULL_TREE);
>  		}
> -	      else if (maybe_simt
> -		       && VAR_P (val)
> -		       && lookup_attribute ("omp simt private",
> -					    DECL_ATTRIBUTES (val)))
> +	    }
> +	  else if (maybe_simt)
> +	    {
> +	      tree val = (DECL_HAS_VALUE_EXPR_P (new_var)
> +			  ? DECL_VALUE_EXPR (new_var)
> +			  : new_var);
> +	      if (simtlast == NULL)
>  		{
> -		  if (simtlast == NULL)
> -		    {
> -		      simtlast = create_tmp_var (unsigned_type_node);
> -		      gcall *g = gimple_build_call_internal
> -			(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
> -		      gimple_call_set_lhs (g, simtlast);
> -		      gimple_seq_add_stmt (stmt_list, g);
> -		    }
> -		  x = build_call_expr_internal_loc
> -		    (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
> -		     TREE_TYPE (val), 2, val, simtlast);
> -		  new_var = unshare_expr (new_var);
> -		  gimplify_assign (new_var, x, stmt_list);
> -		  new_var = unshare_expr (new_var);
> +		  simtlast = create_tmp_var (unsigned_type_node);
> +		  gcall *g = gimple_build_call_internal
> +		    (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
> +		  gimple_call_set_lhs (g, simtlast);
> +		  gimple_seq_add_stmt (stmt_list, g);
>  		}
> +	      x = build_call_expr_internal_loc
> +		(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
> +		 TREE_TYPE (val), 2, val, simtlast);
> +	      new_var = unshare_expr (new_var);
> +	      gimplify_assign (new_var, x, stmt_list);
> +	      new_var = unshare_expr (new_var);
>  	    }
>  
>  	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
> diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
> new file mode 100644
> index 0000000..6925a2a
> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-36.c
> @@ -0,0 +1,18 @@
> +int
> +main ()
> +{
> +  int ah, bh, n = 1024;
> +#pragma omp target map(from: ah, bh)
> +  {
> +    int a, b;
> +#pragma omp simd lastprivate(b)
> +    for (a = 0; a < n; a++)
> +      {
> +	b = a + n + 1;
> +	asm volatile ("" : "+r"(b));
> +      }
> +    ah = a, bh = b;
> +  }
> +  if (ah != n || bh != 2 * n)
> +    __builtin_abort ();
> +}
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH] omp-low: fix lastprivate/linear lowering for SIMT
  2017-04-07  7:58       ` [PATCH] omp-low: fix lastprivate/linear lowering for SIMT Alexander Monakov
@ 2017-04-20 12:55         ` Alexander Monakov
  0 siblings, 0 replies; 27+ messages in thread
From: Alexander Monakov @ 2017-04-20 12:55 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

Ping - as this patch addresses a wrong-code issue in new functionality, I'd like
to ask if it may be applied to gcc-7 branch too.

On Fri, 7 Apr 2017, Alexander Monakov wrote:

> Ping.
> 
> > I've noticed while re-reading that this patch incorrectly handled SIMT case
> > in lower_lastprivate_clauses.  The code was changed to look for variables
> > with "omp simt private" attribute, and was left under
> > 'simduid && DECL_HAS_VALUE_EXPR_P (new_var)' condition.  This effectively
> > constrained processing to privatized (i.e. addressable) variables, as
> > non-addressable variables now have neither the value-expr nor the attribute.
> > 
> > This wasn't caught in testing, as apparently all testcases that have target
> > simd loops with linear/lastprivate clauses also have the corresponding variables
> > mentioned in target map clause, which makes them addressable (is that necessary?),
> > and I didn't think to check something like that manually.
> > 
> > The following patch fixes it by adjusting the if's in lower_lastprivate_clauses;
> > alternatively it may be possible to keep that code as-is, and instead set up
> > value-expr and "omp simt private" attributes for all formally private variables,
> > not just addressable ones, but to me that sounds less preferable.  OK for trunk?
> > 
> > I think it's possible to improve test coverage for NVPTX by running all OpenMP
> > testcases via nvptx-none-run (it'll need some changes, but shouldn't be hard).
> > 
> > gcc/
> > 	* omp-low.c (lower_lastprivate_clauses): Correct handling of linear and
> > 	lastprivate clauses in SIMT case.
> > 
> > libgomp/
> > 	* testsuite/libgomp.c/target-36.c: New testcase.
> > 
> > Thanks.
> > Alexander
> > 	
> > diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> > index 253dc85..02b681c 100644
> > --- a/gcc/omp-low.c
> > +++ b/gcc/omp-low.c
> > @@ -4768,11 +4768,10 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
> >  		TREE_NO_WARNING (new_var) = 1;
> >  	    }
> >  
> > -	  if (simduid && DECL_HAS_VALUE_EXPR_P (new_var))
> > +	  if (!maybe_simt && simduid && DECL_HAS_VALUE_EXPR_P (new_var))
> >  	    {
> >  	      tree val = DECL_VALUE_EXPR (new_var);
> > -	      if (!maybe_simt
> > -		  && TREE_CODE (val) == ARRAY_REF
> > +	      if (TREE_CODE (val) == ARRAY_REF
> >  		  && VAR_P (TREE_OPERAND (val, 0))
> >  		  && lookup_attribute ("omp simd array",
> >  				       DECL_ATTRIBUTES (TREE_OPERAND (val,
> > @@ -4792,26 +4791,26 @@ lower_lastprivate_clauses (tree clauses, tree predicate, gimple_seq *stmt_list,
> >  				    TREE_OPERAND (val, 0), lastlane,
> >  				    NULL_TREE, NULL_TREE);
> >  		}
> > -	      else if (maybe_simt
> > -		       && VAR_P (val)
> > -		       && lookup_attribute ("omp simt private",
> > -					    DECL_ATTRIBUTES (val)))
> > +	    }
> > +	  else if (maybe_simt)
> > +	    {
> > +	      tree val = (DECL_HAS_VALUE_EXPR_P (new_var)
> > +			  ? DECL_VALUE_EXPR (new_var)
> > +			  : new_var);
> > +	      if (simtlast == NULL)
> >  		{
> > -		  if (simtlast == NULL)
> > -		    {
> > -		      simtlast = create_tmp_var (unsigned_type_node);
> > -		      gcall *g = gimple_build_call_internal
> > -			(IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
> > -		      gimple_call_set_lhs (g, simtlast);
> > -		      gimple_seq_add_stmt (stmt_list, g);
> > -		    }
> > -		  x = build_call_expr_internal_loc
> > -		    (UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
> > -		     TREE_TYPE (val), 2, val, simtlast);
> > -		  new_var = unshare_expr (new_var);
> > -		  gimplify_assign (new_var, x, stmt_list);
> > -		  new_var = unshare_expr (new_var);
> > +		  simtlast = create_tmp_var (unsigned_type_node);
> > +		  gcall *g = gimple_build_call_internal
> > +		    (IFN_GOMP_SIMT_LAST_LANE, 1, simtcond);
> > +		  gimple_call_set_lhs (g, simtlast);
> > +		  gimple_seq_add_stmt (stmt_list, g);
> >  		}
> > +	      x = build_call_expr_internal_loc
> > +		(UNKNOWN_LOCATION, IFN_GOMP_SIMT_XCHG_IDX,
> > +		 TREE_TYPE (val), 2, val, simtlast);
> > +	      new_var = unshare_expr (new_var);
> > +	      gimplify_assign (new_var, x, stmt_list);
> > +	      new_var = unshare_expr (new_var);
> >  	    }
> >  
> >  	  if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
> > diff --git a/libgomp/testsuite/libgomp.c/target-36.c b/libgomp/testsuite/libgomp.c/target-36.c
> > new file mode 100644
> > index 0000000..6925a2a
> > --- /dev/null
> > +++ b/libgomp/testsuite/libgomp.c/target-36.c
> > @@ -0,0 +1,18 @@
> > +int
> > +main ()
> > +{
> > +  int ah, bh, n = 1024;
> > +#pragma omp target map(from: ah, bh)
> > +  {
> > +    int a, b;
> > +#pragma omp simd lastprivate(b)
> > +    for (a = 0; a < n; a++)
> > +      {
> > +	b = a + n + 1;
> > +	asm volatile ("" : "+r"(b));
> > +      }
> > +    ah = a, bh = b;
> > +  }
> > +  if (ah != n || bh != 2 * n)
> > +    __builtin_abort ();
> > +}
> > 
> 

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/5] omp-low: implement SIMT privatization, part 1
  2017-03-31 16:05     ` Alexander Monakov
  2017-04-07  7:58       ` [PATCH] omp-low: fix lastprivate/linear lowering for SIMT Alexander Monakov
@ 2017-04-20 15:32       ` Jakub Jelinek
  2017-04-20 16:48         ` Alexander Monakov
  1 sibling, 1 reply; 27+ messages in thread
From: Jakub Jelinek @ 2017-04-20 15:32 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Fri, Mar 31, 2017 at 06:38:09PM +0300, Alexander Monakov wrote:
> I've noticed while re-reading that this patch incorrectly handled SIMT case
> in lower_lastprivate_clauses.  The code was changed to look for variables
> with "omp simt private" attribute, and was left under
> 'simduid && DECL_HAS_VALUE_EXPR_P (new_var)' condition.  This effectively
> constrained processing to privatized (i.e. addressable) variables, as
> non-addressable variables now have neither the value-expr nor the attribute.

Sorry for the review delay.

> This wasn't caught in testing, as apparently all testcases that have target
> simd loops with linear/lastprivate clauses also have the corresponding variables
> mentioned in target map clause, which makes them addressable (is that necessary?),

Yes, in order to map something you need to map its address (+ size) on the
host to its address on the device, so it needs to be addressable.
Compared to that, firstprivate on target should not make it addressable.

> and I didn't think to check something like that manually.
> 
> The following patch fixes it by adjusting the if's in lower_lastprivate_clauses;
> alternatively it may be possible to keep that code as-is, and instead set up
> value-expr and "omp simt private" attributes for all formally private variables,
> not just addressable ones, but to me that sounds less preferable.  OK for trunk?
> 
> I think it's possible to improve test coverage for NVPTX by running all OpenMP
> testcases via nvptx-none-run (it'll need some changes, but shouldn't be hard).
> 
> gcc/
> 	* omp-low.c (lower_lastprivate_clauses): Correct handling of linear and
> 	lastprivate clauses in SIMT case.
> 
> libgomp/
> 	* testsuite/libgomp.c/target-36.c: New testcase.

Ok for trunk/gcc-7-branch, thanks.

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.c/target-36.c
> @@ -0,0 +1,18 @@
> +int
> +main ()
> +{
> +  int ah, bh, n = 1024;
> +#pragma omp target map(from: ah, bh)
> +  {
> +    int a, b;
> +#pragma omp simd lastprivate(b)
> +    for (a = 0; a < n; a++)
> +      {
> +	b = a + n + 1;
> +	asm volatile ("" : "+r"(b));
> +      }
> +    ah = a, bh = b;
> +  }
> +  if (ah != n || bh != 2 * n)
> +    __builtin_abort ();
> +}

Would be nice to also test explicit linear, perhaps in the same testcase,
just add ch and c and use say linear(c:2).

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/5] omp-low: implement SIMT privatization, part 1
  2017-04-20 15:32       ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Jakub Jelinek
@ 2017-04-20 16:48         ` Alexander Monakov
  2017-04-20 17:29           ` Jakub Jelinek
  0 siblings, 1 reply; 27+ messages in thread
From: Alexander Monakov @ 2017-04-20 16:48 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Thu, 20 Apr 2017, Jakub Jelinek wrote:
> > This wasn't caught in testing, as apparently all testcases that have target
> > simd loops with linear/lastprivate clauses also have the corresponding variables
> > mentioned in target map clause, which makes them addressable (is that necessary?),
> 
> Yes, in order to map something you need to map its address (+ size) on the
> host to its address on the device, so it needs to be addressable.
> Compared to that, firstprivate on target should not make it addressable.

But ideally if nothing else is taking the address of a mapped variable inside
of a target region, then it'd be more efficient to create a non-addressable
instance and just copy its value from/to the addressable one on target
region entry/exit.

> Would be nice to also test explicit linear, perhaps in the same testcase,
> just add ch and c and use say linear(c:2).

Unfortunately that uncovers a separate wrong-code issue, explicit linear is
not specifically handled in simt regions, but it should be, since we change
the loop iteration step.

Thanks.
Alexander

^ permalink raw reply	[flat|nested] 27+ messages in thread

* Re: [PATCH 2/5] omp-low: implement SIMT privatization, part 1
  2017-04-20 16:48         ` Alexander Monakov
@ 2017-04-20 17:29           ` Jakub Jelinek
  0 siblings, 0 replies; 27+ messages in thread
From: Jakub Jelinek @ 2017-04-20 17:29 UTC (permalink / raw)
  To: Alexander Monakov; +Cc: gcc-patches

On Thu, Apr 20, 2017 at 07:37:13PM +0300, Alexander Monakov wrote:
> On Thu, 20 Apr 2017, Jakub Jelinek wrote:
> > > This wasn't caught in testing, as apparently all testcases that have target
> > > simd loops with linear/lastprivate clauses also have the corresponding variables
> > > mentioned in target map clause, which makes them addressable (is that necessary?),
> > 
> > Yes, in order to map something you need to map its address (+ size) on the
> > host to its address on the device, so it needs to be addressable.
> > Compared to that, firstprivate on target should not make it addressable.
> 
> But ideally if nothing else is taking the address of a mapped variable inside
> of a target region, then it'd be more efficient to create a non-addressable
> instance and just copy its value from/to the addressable one on target
> region entry/exit.

Perhaps, but you'd need to do only if it is map on the target construct
because that is the only case where you can actually add copy in/out code
on the host as well as target.  And you'd need to think about nowait
implications etc., or what happens if it in addition to target construct
is mentioned in map clause on some other construct etc., i.e. take into
account all the clauses of the variable in the scope of the variable, not
just a single one.  That is GCC8 material for sure.  If it is say map(to:),
it could as well be just promotion into firstprivate, or for map(alloc:) to
private etc.

> > Would be nice to also test explicit linear, perhaps in the same testcase,
> > just add ch and c and use say linear(c:2).
> 
> Unfortunately that uncovers a separate wrong-code issue, explicit linear is
> not specifically handled in simt regions, but it should be, since we change
> the loop iteration step.

Then please commit what you have now and deal with the rest incrementally.

	Jakub

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2017-04-20 16:48 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-22 15:46 [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Alexander Monakov
2017-03-22 15:46 ` [PATCH 1/5] nvptx: implement SIMT enter/exit insns Alexander Monakov
2017-03-27 11:12   ` Alexander Monakov
2017-03-27 15:03     ` Bernd Schmidt
2017-03-22 15:46 ` [PATCH 5/5] address-taken: optimize SIMT privatized variables Alexander Monakov
2017-03-23 10:48   ` Jakub Jelinek
2017-03-22 15:46 ` [PATCH 3/5] omp-offload: implement SIMT privatization, part 2 Alexander Monakov
2017-03-23 10:37   ` Jakub Jelinek
2017-03-23 10:53     ` Alexander Monakov
2017-03-23 11:19       ` Jakub Jelinek
2017-03-22 15:46 ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Alexander Monakov
2017-03-23 10:32   ` Jakub Jelinek
2017-03-31 16:05     ` Alexander Monakov
2017-04-07  7:58       ` [PATCH] omp-low: fix lastprivate/linear lowering for SIMT Alexander Monakov
2017-04-20 12:55         ` Alexander Monakov
2017-04-20 15:32       ` [PATCH 2/5] omp-low: implement SIMT privatization, part 1 Jakub Jelinek
2017-04-20 16:48         ` Alexander Monakov
2017-04-20 17:29           ` Jakub Jelinek
2017-03-22 15:46 ` [PATCH 4/5] tree-inline: implement SIMT privatization, part 3 Alexander Monakov
2017-03-23 10:47   ` Jakub Jelinek
2017-03-23 11:13     ` Alexander Monakov
2017-03-23 11:25       ` Jakub Jelinek
2017-03-23 16:15         ` Alexander Monakov
2017-03-23 16:23           ` Jakub Jelinek
2017-03-23 17:02             ` Alexander Monakov
2017-03-23 17:09               ` Jakub Jelinek
2017-03-31 10:22 ` [PATCHv2 0/5] OpenMP/PTX: improve correctness in SIMD regions Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).