public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Bernd Schmidt <bernds@codesourcery.com>
To: GCC Patches <gcc-patches@gcc.gnu.org>, Jakub Jelinek <jakub@redhat.com>
Subject: [gomp4] Vector-single predication
Date: Thu, 21 May 2015 12:21:00 -0000	[thread overview]
Message-ID: <555DC493.2050208@codesourcery.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 755 bytes --]

This uses the patch I committed yesterday which introduces warp 
broadcasts to implement the vector-single predication needed for 
OpenACC. Outside a loop with vector parallelism, only one of the threads 
representing a vector must execute, the others follow along. So we skip 
the real work in each basic block for the inactive threads, then 
broadcast the direction to take in the control flow graph from the 
active one, and jump as a group.

This will get extended with similar functionality for worker-single. 
Julian is working on some patches on top of that to ensure the later 
optimizers don't destroy the control flow - we really need the threads 
to reconverge and perform the broadcast/jump in lockstep.

Committed on gomp-4_0-branch.


Bernd

[-- Attachment #2: pred-vector.diff --]
[-- Type: text/x-patch, Size: 12786 bytes --]

Index: gcc/ChangeLog.gomp
===================================================================
--- gcc/ChangeLog.gomp	(revision 223444)
+++ gcc/ChangeLog.gomp	(working copy)
@@ -1,5 +1,15 @@
 2015-05-20  Bernd Schmidt  <bernds@codesourcery.com>
 
+	* omp-low.c (struct omp_region): Add a gwv_this field.
+	(bb_region_map): New variable.
+	(find_omp_for_region_data, find_omp_target_region_data): New static
+	functions.
+	(build_omp_regions_1): Call them.  Build the bb_region_map.
+	(enclosing_target_region, requires_vector_predicate,
+	generate_vector_broadcast, predicate_bb, find_predicatable_bbs,
+	predicate_omp_regions): New static functions.
+	(execute_expand_omp): Allocate and free bb_region_map.
+
 	* config/nvptx/nvptx.c: Include "dumpfile,h".
 	(condition_unidirectional_p): New static function.
 	(nvptx_print_operand): Use it for new 'U' handling.
Index: gcc/omp-low.c
===================================================================
--- gcc/omp-low.c	(revision 223442)
+++ gcc/omp-low.c	(working copy)
@@ -159,6 +159,9 @@ struct omp_region
 
   /* True if this is a combined parallel+workshare region.  */
   bool is_combined_parallel;
+
+  /* For an OpenACC loop, the level of parallelism requested.  */
+  int gwv_this;
 };
 
 /* Levels of parallelism as defined by OpenACC.  Increasing numbers
@@ -9961,7 +9964,6 @@ expand_omp_target (struct omp_region *re
     update_ssa (TODO_update_ssa_only_virtuals);
 }
 
-
 /* Expand the parallel region tree rooted at REGION.  Expansion
    proceeds in depth-first order.  Innermost regions are expanded
    first.  This way, parallel regions that require a new function to
@@ -9984,7 +9986,7 @@ expand_omp (struct omp_region *region)
       if (region->type == GIMPLE_OMP_FOR
 	  && gimple_omp_for_combined_p (last_stmt (region->entry)))
 	inner_stmt = last_stmt (region->inner->entry);
-
+     
       if (region->inner)
 	expand_omp (region->inner);
 
@@ -10041,6 +10043,44 @@ expand_omp (struct omp_region *region)
     }
 }
 
+/* Map each basic block to an omp_region.  */
+static hash_map<basic_block, omp_region *> *bb_region_map;
+
+/* Fill in additional data for a region REGION associated with an
+   OMP_FOR STMT.  */
+
+static void
+find_omp_for_region_data (struct omp_region *region, gimple stmt)
+{
+  if (!is_gimple_omp_oacc (stmt))
+    return;
+
+  tree clauses = gimple_omp_for_clauses (stmt);
+  if (find_omp_clause (clauses, OMP_CLAUSE_GANG))
+    region->gwv_this |= MASK_GANG;
+  if (find_omp_clause (clauses, OMP_CLAUSE_WORKER))
+    region->gwv_this |= MASK_WORKER;
+  if (find_omp_clause (clauses, OMP_CLAUSE_VECTOR))
+    region->gwv_this |= MASK_VECTOR;
+}
+
+/* Fill in additional data for a region REGION associated with an
+   OMP_TARGET STMT.  */
+
+static void
+find_omp_target_region_data (struct omp_region *region, gimple stmt)
+{
+  if (!is_gimple_omp_oacc (stmt))
+    return;
+
+  tree clauses = gimple_omp_target_clauses (stmt);
+  if (find_omp_clause (clauses, OMP_CLAUSE_NUM_GANGS))
+    region->gwv_this |= MASK_GANG;
+  if (find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS))
+    region->gwv_this |= MASK_WORKER;
+  if (find_omp_clause (clauses, OMP_CLAUSE_VECTOR_LENGTH))
+    region->gwv_this |= MASK_VECTOR;
+}
 
 /* Helper for build_omp_regions.  Scan the dominator tree starting at
    block BB.  PARENT is the region that contains BB.  If SINGLE_TREE is
@@ -10055,6 +10095,8 @@ build_omp_regions_1 (basic_block bb, str
   gimple stmt;
   basic_block son;
 
+  bb_region_map->put (bb, parent);
+
   gsi = gsi_last_bb (bb);
   if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
     {
@@ -10107,6 +10149,7 @@ build_omp_regions_1 (basic_block bb, str
 		case GF_OMP_TARGET_KIND_OACC_PARALLEL:
 		case GF_OMP_TARGET_KIND_OACC_KERNELS:
 		case GF_OMP_TARGET_KIND_OACC_DATA:
+		  find_omp_target_region_data (region, stmt);
 		  break;
 		case GF_OMP_TARGET_KIND_UPDATE:
 		case GF_OMP_TARGET_KIND_OACC_UPDATE:
@@ -10118,6 +10161,8 @@ build_omp_regions_1 (basic_block bb, str
 		  gcc_unreachable ();
 		}
 	    }
+	  else if (code == GIMPLE_OMP_FOR)
+	    find_omp_for_region_data (region, stmt);
 	  /* ..., this directive becomes the parent for a new region.  */
 	  if (region)
 	    parent = region;
@@ -10156,7 +10201,7 @@ omp_expand_local (basic_block head)
       dump_omp_region (dump_file, root_omp_region, 0);
       fprintf (dump_file, "\n");
     }
-
+  
   remove_exit_barriers (root_omp_region);
   expand_omp (root_omp_region);
 
@@ -10174,31 +10219,264 @@ build_omp_regions (void)
   build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
 }
 
+/* Walk the tree upwards from region until a target region is found
+   or we reach the end, then return it.  */
+static omp_region *
+enclosing_target_region (omp_region *region)
+{
+  while (region != NULL
+	 && region->type != GIMPLE_OMP_TARGET)
+    region = region->outer;
+  return region;
+}
+
+/* Return true if basic blocks in REGION require OpenACC vector
+   predication.  */
+static bool
+requires_vector_predicate (struct omp_region *region)
+{
+  while (region
+	 && region->type != GIMPLE_OMP_FOR && region->type != GIMPLE_OMP_TARGET)
+    region = region->outer;
+  if (!region)
+    return false;
+  omp_region *outer_target = enclosing_target_region (region);
+  if (!outer_target || (outer_target->gwv_this & MASK_VECTOR) == 0)
+    return false;
+  if (region->type == GIMPLE_OMP_FOR && (region->gwv_this & MASK_VECTOR) == 0)
+    return true;
+  return false;
+}
+
+/* Generate a broadcast across OpenACC vector threads (a warp on GPUs)
+   so that VAR is broadcast to DEST_VAR.  The new statements are
+   added after WHERE.  */
+static void
+generate_vector_broadcast (tree dest_var, tree var,
+			   gimple_stmt_iterator &where)
+{
+  tree vartype = TREE_TYPE (var);
+  tree call_arg_type = unsigned_type_node;
+  enum built_in_function fn = BUILT_IN_GOACC_THREAD_BROADCAST;
+  if (TYPE_PRECISION (vartype) > TYPE_PRECISION (call_arg_type))
+    {
+      fn = BUILT_IN_GOACC_THREAD_BROADCAST_LL;
+      call_arg_type = long_long_unsigned_type_node;
+    }
+  bool need_conversion = !types_compatible_p (vartype, call_arg_type);
+  tree casted_var = var;
+  if (need_conversion)
+    {
+      casted_var = create_tmp_var (call_arg_type);
+      gassign *conv1 = gimple_build_assign (casted_var, NOP_EXPR, var);
+      gsi_insert_after (&where, conv1, GSI_CONTINUE_LINKING);
+    }
+
+  tree decl = builtin_decl_explicit (fn);
+  gimple call = gimple_build_call (decl, 1, casted_var);
+  gsi_insert_after (&where, call, GSI_NEW_STMT);
+  tree casted_dest = dest_var;
+  if (need_conversion)
+    {
+      casted_dest = create_tmp_var (call_arg_type);
+      create_tmp_var (call_arg_type);
+      gassign *conv2 = gimple_build_assign (dest_var, NOP_EXPR,
+					    casted_dest);
+      gsi_insert_after (&where, conv2, GSI_CONTINUE_LINKING);
+    }
+  gimple_call_set_lhs (call, casted_dest);
+
+}
+
+/* Apply OpenACC vector predication to basic block BB which is in
+   region PARENT.  */
+
+static void
+predicate_bb (basic_block bb, struct omp_region *parent)
+{
+  if (!requires_vector_predicate (parent))
+    return;
+
+  gimple_stmt_iterator gsi;
+  gimple stmt;
+
+  gsi = gsi_last_bb (bb);
+  stmt = gsi_stmt (gsi);
+  if (stmt == NULL)
+    return;
+
+  basic_block skip_dest_bb = NULL;
+  basic_block *adjust_bb_ptr = NULL;
+
+  if (gimple_code (stmt) == GIMPLE_COND)
+    {
+      tree cond_var = create_tmp_var (boolean_type_node);
+      tree broadcast_cond = create_tmp_var (boolean_type_node);
+      gassign *asgn = gimple_build_assign (cond_var,
+					   gimple_cond_code (stmt),
+					   gimple_cond_lhs (stmt),
+					   gimple_cond_rhs (stmt));
+      gsi_insert_before (&gsi, asgn, GSI_CONTINUE_LINKING);
+      gimple_stmt_iterator gsi_asgn = gsi_for_stmt (asgn);
+
+      generate_vector_broadcast (broadcast_cond, cond_var, gsi_asgn);
+
+      edge e = split_block (bb, asgn);
+      skip_dest_bb = e->dest;
+
+      gimple_cond_set_condition (as_a <gcond *> (stmt), EQ_EXPR,
+				 broadcast_cond, boolean_true_node);
+    }
+  else if (gimple_code (stmt) == GIMPLE_SWITCH)
+    {
+      gswitch *sstmt = as_a <gswitch *> (stmt);
+      tree var = gimple_switch_index (sstmt);
+      tree new_var = create_tmp_var (TREE_TYPE (var));
+
+      gassign *asgn = gimple_build_assign (new_var, var);
+      gsi_insert_before (&gsi, asgn, GSI_CONTINUE_LINKING);
+      gimple_stmt_iterator gsi_asgn = gsi_for_stmt (asgn);
+
+      generate_vector_broadcast (new_var, var, gsi_asgn);
+
+      edge e = split_block (bb, asgn);
+      skip_dest_bb = e->dest;
+
+      gimple_switch_set_index (sstmt, new_var);
+    }
+  else if (is_gimple_omp (stmt))
+    {
+      gsi_prev (&gsi);
+      /* Only a few statements need special treatment.  */
+      if (gimple_code (stmt) != GIMPLE_OMP_FOR
+	  && gimple_code (stmt) != GIMPLE_OMP_CONTINUE)
+	{
+	  edge e = single_succ_edge (bb);
+	  skip_dest_bb = e->dest;
+	  if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
+	    {
+	      gcc_assert (parent->exit == bb);
+	      adjust_bb_ptr = &parent->exit;
+	    }
+	}
+      else
+	{
+	  gimple split_stmt = gsi_stmt (gsi);
+	  if (!split_stmt)
+	    return;
+	  edge e = split_block (bb, split_stmt);
+	  skip_dest_bb = e->dest;
+	  if (gimple_code (stmt) == GIMPLE_OMP_CONTINUE)
+	    {
+	      gcc_assert (parent->cont == bb);
+	      parent->cont = skip_dest_bb;
+	    }
+	  else if (gimple_code (stmt) == GIMPLE_OMP_FOR)
+	    {
+	      omp_region *inner;
+	      inner = *bb_region_map->get (FALLTHRU_EDGE (skip_dest_bb)->dest);
+	      gcc_assert (inner->entry == bb);
+	      inner->entry = skip_dest_bb;
+	    }
+	}
+    }
+  else if (single_succ_p (bb))
+    {
+      edge e = single_succ_edge (bb);
+      skip_dest_bb = e->dest;
+      if (gimple_code (stmt) == GIMPLE_GOTO)
+	gsi_prev (&gsi);
+      if (gsi_stmt (gsi) == 0)
+	return;
+    }
+
+  if (skip_dest_bb != NULL)
+    {
+      gimple_stmt_iterator head_gsi = gsi_start_bb (bb);
+      gsi_prev (&head_gsi);
+      edge e2 = split_block (bb, gsi_stmt (head_gsi));
+      basic_block cond_bb = e2->src;
+
+      if (adjust_bb_ptr)
+	*adjust_bb_ptr = e2->dest;
+
+      gimple_stmt_iterator tmp_gsi = gsi_last_bb (cond_bb);
+
+      tree decl = builtin_decl_explicit (BUILT_IN_GOACC_TID);
+      gimple call = gimple_build_call (decl, 1, integer_zero_node);
+      tree tmp_var = create_tmp_var (unsigned_type_node);
+      gimple_call_set_lhs (call, tmp_var);
+
+      gsi_insert_after (&tmp_gsi, call, GSI_NEW_STMT);
+
+      tree cond = build2 (EQ_EXPR, boolean_type_node, tmp_var,
+			  fold_convert (unsigned_type_node, integer_zero_node));
+      gimple cond_stmt = gimple_build_cond_empty (cond);
+      gsi_insert_after (&tmp_gsi, cond_stmt, GSI_CONTINUE_LINKING);
+
+      e2->flags = EDGE_TRUE_VALUE;
+      make_edge (cond_bb, skip_dest_bb, EDGE_FALSE_VALUE);
+    }
+}
+
+/* Walk the dominator tree starting at BB to collect basic blocks in
+   WORKLIST which need OpenACC vector predication applied to them.  */
+static void
+find_predicatable_bbs (basic_block bb, vec<basic_block> &worklist)
+{
+  struct omp_region *parent = *bb_region_map->get (bb);
+  if (requires_vector_predicate (parent))
+    worklist.safe_push (bb);
+  basic_block son;
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+       son;
+       son = next_dom_son (CDI_DOMINATORS, son))
+    find_predicatable_bbs (son, worklist);
+}
+
+/* Apply OpenACC vector predication to all basic blocks.  HEAD_BB is the
+   first.  */
+static void
+predicate_omp_regions (basic_block head_bb)
+{
+  vec<basic_block> worklist = vNULL;
+  find_predicatable_bbs (head_bb, worklist);
+  int i;
+  basic_block bb;
+  FOR_EACH_VEC_ELT (worklist, i, bb)
+    predicate_bb (bb, *bb_region_map->get (bb));
+}
+
 /* Main entry point for expanding OMP-GIMPLE into runtime calls.  */
 
 static unsigned int
 execute_expand_omp (void)
 {
-  build_omp_regions ();
+  bb_region_map = new hash_map<basic_block, omp_region *>;
 
-  if (!root_omp_region)
-    return 0;
+  build_omp_regions ();
 
-  if (dump_file)
+  if (root_omp_region)
     {
-      fprintf (dump_file, "\nOMP region tree\n\n");
-      dump_omp_region (dump_file, root_omp_region, 0);
-      fprintf (dump_file, "\n");
-    }
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nOMP region tree\n\n");
+	  dump_omp_region (dump_file, root_omp_region, 0);
+	  fprintf (dump_file, "\n");
+	}
 
-  remove_exit_barriers (root_omp_region);
+      predicate_omp_regions (ENTRY_BLOCK_PTR_FOR_FN (cfun));
 
-  expand_omp (root_omp_region);
+      remove_exit_barriers (root_omp_region);
 
-  cleanup_tree_cfg ();
+      expand_omp (root_omp_region);
 
-  free_omp_regions ();
+      cleanup_tree_cfg ();
 
+      free_omp_regions ();
+    }
+
+  delete bb_region_map;
   return 0;
 }
 

             reply	other threads:[~2015-05-21 11:42 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-05-21 12:21 Bernd Schmidt [this message]
2015-05-21 12:23 ` Jakub Jelinek
2015-05-21 13:16   ` Julian Brown
2015-05-21 13:29     ` Jakub Jelinek
2015-05-21 13:42       ` Julian Brown
2015-05-21 13:45         ` Julian Brown
2015-05-21 13:51         ` Jakub Jelinek

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=555DC493.2050208@codesourcery.com \
    --to=bernds@codesourcery.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=jakub@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).