public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [gomp4] routine calls
@ 2015-08-20 17:39 Nathan Sidwell
  0 siblings, 0 replies; only message in thread
From: Nathan Sidwell @ 2015-08-20 17:39 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 372 bytes --]

I've committed this to gomp4 branch.  It augments the call RTL with an optional 
const int, indicating the partitioning requirements of the target function. 
This is set from  the target function's 'oacc function' attribute.  We don't do 
anything with this information yet -- it'll be needed to get the correct number 
of threads to execute the call instruction.

nathan

[-- Attachment #2: gomp4-routine-call.patch --]
[-- Type: text/x-patch, Size: 9158 bytes --]

2015-08-20  Nathan Sidwell  <nathan@codesourcery.com>

	* omp-low.c (build_oacc_routine_dims): Expand comment.
	* config/nvptx/nvptx.md (call_operation): Skip optional
	partitioning information.
	* config/nvptx/nvptx.c (nvptx_expand_call): Insert target
	partitioning information, if present.
	(nvptx_output_call_insn): Skip partitioning info, if present.

Index: gcc/config/nvptx/nvptx.c
===================================================================
--- gcc/config/nvptx/nvptx.c	(revision 226981)
+++ gcc/config/nvptx/nvptx.c	(working copy)
@@ -848,19 +848,18 @@ nvptx_end_call_args (void)
 void
 nvptx_expand_call (rtx retval, rtx address)
 {
-  int nargs;
+  int nargs = 0;
   rtx callee = XEXP (address, 0);
   rtx pat, t;
   rtvec vec;
   bool external_decl = false;
+  rtx partitioning = NULL_RTX;
+  rtx varargs = NULL_RTX;
+  tree decl_type = NULL_TREE;
 
-  nargs = 0;
   for (t = cfun->machine->call_args; t; t = XEXP (t, 1))
     nargs++;
 
-  bool has_varargs = false;
-  tree decl_type = NULL_TREE;
-
   if (!call_insn_operand (callee, Pmode))
     {
       callee = force_reg (Pmode, callee);
@@ -877,6 +876,22 @@ nvptx_expand_call (rtx retval, rtx addre
 	    cfun->machine->has_call_with_sc = true;
 	  if (DECL_EXTERNAL (decl))
 	    external_decl = true;
+	  tree attr = get_oacc_fn_attrib (decl);
+	  if (attr)
+	    {
+	      tree dims = TREE_VALUE (attr);
+
+	      for (int ix = 0; ix != GOMP_DIM_MAX; ix++)
+		{
+		  if (TREE_PURPOSE (dims)
+		      && !integer_zerop (TREE_PURPOSE (dims)))
+		    {
+		      partitioning = GEN_INT (ix);
+		      break;
+		    }
+		  dims = TREE_CHAIN (dims);
+		}
+	    }
 	}
     }
   if (cfun->machine->funtype
@@ -887,31 +902,19 @@ nvptx_expand_call (rtx retval, rtx addre
 	  || TREE_CODE (cfun->machine->funtype) == METHOD_TYPE)
       && stdarg_p (cfun->machine->funtype))
     {
-      has_varargs = true;
-      cfun->machine->has_call_with_varargs = true;
-    }
-  vec = rtvec_alloc (nargs + 1 + (has_varargs ? 1 : 0));
-  pat = gen_rtx_PARALLEL (VOIDmode, vec);
-  if (has_varargs)
-    {
-      rtx this_arg = gen_reg_rtx (Pmode);
+      varargs = gen_reg_rtx (Pmode);
       if (Pmode == DImode)
-	emit_move_insn (this_arg, stack_pointer_rtx);
+	emit_move_insn (varargs, stack_pointer_rtx);
       else
-	emit_move_insn (this_arg, stack_pointer_rtx);
-      XVECEXP (pat, 0, nargs + 1) = gen_rtx_USE (VOIDmode, this_arg);
-    }
-
-  /* Construct the call insn, including a USE for each argument pseudo
-     register.  These will be used when printing the insn.  */
-  int i;
-  rtx arg;
-  for (i = 1, arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1), i++)
-    {
-      rtx this_arg = XEXP (arg, 0);
-      XVECEXP (pat, 0, i) = gen_rtx_USE (VOIDmode, this_arg);
+	emit_move_insn (varargs, stack_pointer_rtx);
+      cfun->machine->has_call_with_varargs = true;
     }
+  vec = rtvec_alloc (nargs + 1
+		     + (partitioning ? 1 : 0) + (varargs ? 1 : 0));
+  pat = gen_rtx_PARALLEL (VOIDmode, vec);
 
+  int vec_pos = 0;
+  
   rtx tmp_retval = retval;
   t = gen_rtx_CALL (VOIDmode, address, const0_rtx);
   if (retval != NULL_RTX)
@@ -920,7 +923,23 @@ nvptx_expand_call (rtx retval, rtx addre
 	tmp_retval = gen_reg_rtx (GET_MODE (retval));
       t = gen_rtx_SET (tmp_retval, t);
     }
-  XVECEXP (pat, 0, 0) = t;
+  XVECEXP (pat, 0, vec_pos++) = t;
+
+  if (partitioning)
+    XVECEXP (pat, 0, vec_pos++) = partitioning;
+
+  /* Construct the call insn, including a USE for each argument pseudo
+     register.  These will be used when printing the insn.  */
+  for (rtx arg = cfun->machine->call_args; arg; arg = XEXP (arg, 1))
+    {
+      rtx this_arg = XEXP (arg, 0);
+      XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, this_arg);
+    }
+
+  if (varargs)
+      XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, varargs);
+
+  gcc_assert (vec_pos = XVECLEN (pat, 0));
 
   /* If this is a libcall, decl_type is NULL. For a call to a non-libcall
      undeclared function, we'll have an external decl without arg types.
@@ -1816,17 +1835,26 @@ nvptx_output_call_insn (rtx_insn *insn,
   static int labelno;
   bool needs_tgt = register_operand (callee, Pmode);
   rtx pat = PATTERN (insn);
-  int nargs = XVECLEN (pat, 0) - 1;
+  int arg_end = XVECLEN (pat, 0);
+  int arg_start = 1;
   tree decl = NULL_TREE;
+  rtx partitioning = NULL_RTX;
 
-  fprintf (asm_out_file, "\t{\n");
-  if (result != NULL)
+  if (arg_end > 1)
     {
-      fprintf (asm_out_file, "\t\t.param%s %%retval_in;\n",
-	       nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
-					 false));
+      partitioning = XVECEXP (pat, 0, 1);
+      if (GET_CODE (partitioning) == CONST_INT)
+	arg_start++;
+      else
+	partitioning = NULL_RTX;
     }
 
+  fprintf (asm_out_file, "\t{\n");
+  if (result != NULL)
+    fprintf (asm_out_file, "\t\t.param%s %%retval_in;\n",
+	     nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result)),
+				       false));
+
   /* Ensure we have a ptx declaration in the output if necessary.  */
   if (GET_CODE (callee) == SYMBOL_REF)
     {
@@ -1845,20 +1873,20 @@ nvptx_output_call_insn (rtx_insn *insn,
       fputs (s.str().c_str(), asm_out_file);
     }
 
-  for (int i = 0, argno = 0; i < nargs; i++)
+  for (int i = arg_start, argno = 0; i < arg_end; i++)
     {
-      rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
+      rtx t = XEXP (XVECEXP (pat, 0, i), 0);
       machine_mode mode = GET_MODE (t);
       int count = maybe_split_mode (&mode);
 
-      while (count-- > 0)
+      while (count--)
 	fprintf (asm_out_file, "\t\t.param%s %%out_arg%d%s;\n",
 		 nvptx_ptx_type_from_mode (mode, false), argno++,
 		 mode == QImode || mode == HImode ? "[1]" : "");
     }
-  for (int i = 0, argno = 0; i < nargs; i++)
+  for (int i = arg_start, argno = 0; i < arg_end; i++)
     {
-      rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
+      rtx t = XEXP (XVECEXP (pat, 0, i), 0);
       gcc_assert (REG_P (t));
       machine_mode mode = GET_MODE (t);
       int count = maybe_split_mode (&mode);
@@ -1870,7 +1898,7 @@ nvptx_output_call_insn (rtx_insn *insn,
       else
 	{
 	  int n = 0;
-	  while (count-- > 0)
+	  while (count--)
 	    fprintf (asm_out_file, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n",
 		     nvptx_ptx_type_from_mode (mode, false), argno++,
 		     REGNO (t), n++);
@@ -1890,33 +1918,30 @@ nvptx_output_call_insn (rtx_insn *insn,
   else
     output_address (callee);
 
-  if (nargs > 0 || (decl && DECL_STATIC_CHAIN (decl)))
+  if (arg_end > arg_start || (decl && DECL_STATIC_CHAIN (decl)))
     {
+      const char *comma = "";
+      
       fprintf (asm_out_file, ", (");
-      int i, argno;
-      for (i = 0, argno = 0; i < nargs; i++)
+      for (int i = arg_start, argno = 0; i < arg_end; i++)
 	{
-	  rtx t = XEXP (XVECEXP (pat, 0, i + 1), 0);
+	  rtx t = XEXP (XVECEXP (pat, 0, i), 0);
 	  machine_mode mode = GET_MODE (t);
 	  int count = maybe_split_mode (&mode);
 
-	  while (count-- > 0)
+	  while (count--)
 	    {
-	      fprintf (asm_out_file, "%%out_arg%d", argno++);
-	      if (i + 1 < nargs || count > 0)
-		fprintf (asm_out_file, ", ");
+	      fprintf (asm_out_file, "%s%%out_arg%d", comma, argno++);
+	      comma = ", ";
 	    }
 	}
       if (decl && DECL_STATIC_CHAIN (decl))
-	{
-	  if (i > 0)
-	    fprintf (asm_out_file, ", ");
-	  fprintf (asm_out_file, "%s",
-		   reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
-	}
+	fprintf (asm_out_file, "%s%s", comma,
+		 reg_names [OUTGOING_STATIC_CHAIN_REGNUM]);
 
       fprintf (asm_out_file, ")");
     }
+
   if (needs_tgt)
     {
       fprintf (asm_out_file, ", ");
Index: gcc/config/nvptx/nvptx.md
===================================================================
--- gcc/config/nvptx/nvptx.md	(revision 226981)
+++ gcc/config/nvptx/nvptx.md	(working copy)
@@ -211,9 +211,14 @@
 (define_predicate "call_operation"
   (match_code "parallel")
 {
-  int i;
+  int arg_start = 1;
+  int arg_end = XVECLEN (op, 0);
 
-  for (i = 1; i < XVECLEN (op, 0); i++)
+  /* Skip optional routine partitioning information.  */
+  if (arg_end > 1 && GET_CODE (XVECEXP (op, 0, 1)) == CONST_INT)
+    arg_start++;
+
+  for (int i = arg_start; i < arg_end; i++)
     {
       rtx elt = XVECEXP (op, 0, i);
 
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 226981)
+++ gcc/omp-low.c	(working copy)
@@ -9372,8 +9372,9 @@ set_oacc_fn_attrib (tree fn, tree clause
     value.  Issue diagnostics as appropriate.  We default to SEQ
     (OpenACC 2.5 clarifies this). All dimensions have a size of zero
     (dynamic).  TREE_PURPOSE is set to indicate whether that dimension
-    can have a loop partitioned on it.  boolean_true_node indicates
-    yes, boolean_false_node indicates no.  */
+    can have a loop partitioned on it.  non-zero indicates
+    yes, zero indicates no.  By construction once a non-zero has been
+    reached, further inner dimensions must also be non-zero.  */
 
 tree
 build_oacc_routine_dims (tree clauses)
@@ -9395,6 +9396,7 @@ build_oacc_routine_dims (tree clauses)
 	  break;
 	}
 
+  /* Default to SEQ.  */
   if (level < 0)
     level = GOMP_DIM_MAX;
   

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2015-08-20 17:38 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-20 17:39 [gomp4] routine calls Nathan Sidwell

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).