public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][Cilkplus] Elemental function cloning with vector params.
@ 2012-04-05 22:37 Iyer, Balaji V
  0 siblings, 0 replies; only message in thread
From: Iyer, Balaji V @ 2012-04-05 22:37 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 294 bytes --]

Hello Everyone,
    This patch is for Cilkplus branch affecting mainly C and C++ compilers. It will clone a function that is marked as elemental function and pass the parameters and return values into the appropriate vector registers.

Thanking You,

Yours Sincerely,

Balaji V. Iyer. 

[-- Attachment #2: patch_elem_fn_cloning_fn_for_C.txt --]
[-- Type: text/plain, Size: 18176 bytes --]

diff --git a/gcc/ChangeLog.cilk b/gcc/ChangeLog.cilk
index 8bfb689..6b7c176 100644
--- a/gcc/ChangeLog.cilk
+++ b/gcc/ChangeLog.cilk
@@ -1,3 +1,17 @@
+2012-04-05  Balaji V. Iyer  <balaji.v.iyer@intel.com>
+
+	* config/i386/i386.c (type_natural_mode): Added a flag_enable_cilk
+	check.
+	(ix86_function_arg_boundary): Likewise.
+	* expr.c (expand_expr_real_1): Likewise.
+	* elem-function.c (create_processor_attribute): Added avx to target
+	string.
+	(create_elem_fn_nodes): called copy_node to new_decl's type.  Replaced
+	tree_function_versioning call with tree_elem_fn_versioning.
+	* tree-inline.c (elem_fn_add_local_variables): New function.
+	(elem_fn_copy_arguments_for_versioning): Likewise.
+	(tree_elem_fn_versioning): Likewise.
+
 2012-03-20  Balaji V. Iyer  <balaji.v.iyer@intel.com>
 
 	* elem-function.c (extract_elem_fn_values): Initialized proc_type field
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 7262859..66fd3c6 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -584,6 +584,8 @@ struct cgraph_node *cgraph_function_versioning (struct cgraph_node *,
 						basic_block, const char *);
 void tree_function_versioning (tree, tree, VEC (ipa_replace_map_p,gc)*,
 			       bool, bitmap, bool, bitmap, basic_block);
+void tree_elem_fn_versioning (tree, tree, VEC (ipa_replace_map_p,gc)*,
+			      bool, bitmap, bool, bitmap, basic_block, int);
 void record_references_in_initializer (tree, bool);
 bool cgraph_process_new_functions (void);
 void cgraph_process_same_body_aliases (void);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 92604ae..754d53d 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5816,9 +5816,12 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
 			&& !warnedavx
 			&& cum->warn_avx)
 		      {
-			warnedavx = true;
-			warning (0, "AVX vector argument without AVX "
-				 "enabled changes the ABI");
+			if (!flag_enable_cilk)
+			  {
+			    warnedavx = true;
+			    warning (0, "AVX vector argument without AVX "
+				     "enabled changes the ABI");
+			  }
 		      }
 		    return TYPE_MODE (type);
 		  }
@@ -7203,11 +7206,14 @@ ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
 	  && align != ix86_compat_function_arg_boundary (mode, type,
 							 saved_align))
 	{
-	  warned = true;
-	  inform (input_location,
-		  "The ABI for passing parameters with %d-byte"
-		  " alignment has changed in GCC 4.6",
-		  align / BITS_PER_UNIT);
+	  if (!flag_enable_cilk)
+	    {
+	      warned = true;
+	      inform (input_location,
+		      "The ABI for passing parameters with %d-byte"
+		      " alignment has changed in GCC 4.6",
+		      align / BITS_PER_UNIT);
+	    }
 	}
     }
 
diff --git a/gcc/elem-function.c b/gcc/elem-function.c
index a6bc4e3..dd3b75b 100644
--- a/gcc/elem-function.c
+++ b/gcc/elem-function.c
@@ -265,6 +265,8 @@ create_processor_attribute (elem_fn_info *elem_fn_values, tree *opposite_attr)
 		     build_string (strlen ("arch=corei7"), "arch=corei7"));
       VEC_safe_push (tree, gc, proc_vec_list,
 		     build_string (strlen ("sse4.2"), "sse4.2"));
+      VEC_safe_push (tree, gc, proc_vec_list,
+		     build_string (strlen ("avx"), "avx"));
       if (opposite_attr)
 	{
 	  VEC_safe_push (tree, gc, opp_proc_vec_list,
@@ -307,7 +309,7 @@ create_optimize_attribute (int option)
   opt_attr = build_tree_list (get_identifier ("optimize"), opt_attr);
   return opt_attr;
 }
-  
+
 /* this function will find the appropriate mangling suffix for the vector
  * function */
 static char *
@@ -362,6 +364,7 @@ create_elem_fn_nodes (struct cgraph_node *node)
   
   old_decl = node->decl;
   new_decl = copy_node (old_decl);
+  TREE_TYPE (new_decl) = copy_node (TREE_TYPE (old_decl));
   elem_fn_values = extract_elem_fn_values (old_decl);
 
   if (elem_fn_values)
@@ -381,8 +384,8 @@ create_elem_fn_nodes (struct cgraph_node *node)
   new_node->local.externally_visible = node->local.externally_visible;
   new_node->lowered = true;
 
-  tree_function_versioning (old_decl, new_decl, NULL, false, NULL, false, NULL,
-			    NULL);
+  tree_elem_fn_versioning (old_decl, new_decl, NULL, false, NULL, false, NULL,
+			   NULL, elem_fn_values->vectorlength[0]);
   cgraph_call_function_insertion_hooks (new_node);
   DECL_STRUCT_FUNCTION (new_decl)->elem_fn_already_cloned = true;
   DECL_STRUCT_FUNCTION (new_decl)->curr_properties = cfun->curr_properties;
diff --git a/gcc/expr.c b/gcc/expr.c
index eaf67a1..920f9b4 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9137,7 +9137,9 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
 	    }
 	  else
 	    pmode = promote_decl_mode (exp, &unsignedp);
-	  gcc_assert (GET_MODE (decl_rtl) == pmode);
+
+	  if (!flag_enable_cilk)
+	    gcc_assert (GET_MODE (decl_rtl) == pmode);
 
 	  temp = gen_lowpart_SUBREG (mode, decl_rtl);
 	  SUBREG_PROMOTED_VAR_P (temp) = 1;
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index b060ae2..c2d8c70 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3777,6 +3777,48 @@ add_local_variables (struct function *callee, struct function *caller,
       }
 }
 
+/* Add local variables from CALLEE to CALLER.  */
+
+static inline void
+elem_fn_add_local_variables (struct function *callee, struct function *caller,
+			     copy_body_data *id, bool check_var_ann,
+			     int vlength)
+{
+  tree var;
+  unsigned ix;
+
+  FOR_EACH_LOCAL_DECL (callee, ix, var)
+    if (TREE_STATIC (var) && !TREE_ASM_WRITTEN (var))
+      {
+	if (!check_var_ann
+	    || (var_ann (var) && add_referenced_var (var)))
+	  add_local_decl (caller, var);
+      }
+    else if (!can_be_nonlocal (var, id))
+      {
+        tree new_var = remap_decl (var, id);
+
+        /* Remap debug-expressions.  */
+	if (TREE_CODE (new_var) == VAR_DECL
+	    && DECL_DEBUG_EXPR_IS_FROM (new_var)
+	    && new_var != var)
+	  {
+	    tree tem = DECL_DEBUG_EXPR (var);
+	    bool old_regimplify = id->regimplify;
+	    id->remapping_type_depth++;
+	    walk_tree (&tem, copy_tree_body_r, id, NULL);
+	    id->remapping_type_depth--;
+	    id->regimplify = old_regimplify;
+	    SET_DECL_DEBUG_EXPR (new_var, tem);
+	  }
+	TREE_TYPE (new_var) = copy_node (TREE_TYPE (new_var));
+	TREE_TYPE (new_var) =
+	  build_vector_type (copy_node (TREE_TYPE (new_var)), vlength);
+	DECL_GIMPLE_REG_P (new_var) = 1;
+ 	add_local_decl (caller, new_var);
+      }
+}
+
 /* If STMT is a GIMPLE_CALL, replace it with its inline expansion.  */
 
 static bool
@@ -4925,6 +4967,52 @@ copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
   return new_parm;
 }
 
+/* Return a copy of the function's argument tree.  */
+static tree
+elem_fn_copy_arguments_for_versioning (tree orig_parm, copy_body_data * id,
+				       bitmap args_to_skip, tree *vars,
+				       int vlength)
+{
+  tree arg, *parg;
+  tree new_parm = NULL;
+  int i = 0;
+
+  parg = &new_parm;
+
+  for (arg = orig_parm; arg; arg = DECL_CHAIN (arg), i++)
+    if (!args_to_skip || !bitmap_bit_p (args_to_skip, i))
+      {
+        tree new_tree = remap_decl (arg, id);
+	if (TREE_CODE (new_tree) != PARM_DECL)
+	  new_tree = id->copy_decl (arg, id);
+	/* bviyer; I am using a dummy value of 4 to make sure this works */
+	TREE_TYPE (new_tree) = copy_node (TREE_TYPE (new_tree));
+	TREE_TYPE (new_tree) =
+	  build_vector_type (TREE_TYPE (new_tree), vlength);
+	DECL_ARG_TYPE (new_tree) =
+	  build_vector_type (DECL_ARG_TYPE (new_tree), vlength);
+	DECL_GIMPLE_REG_P (new_tree) = 1;
+        lang_hooks.dup_lang_specific_decl (new_tree);
+        *parg = new_tree;
+	parg = &DECL_CHAIN (new_tree);
+      }
+    else if (!pointer_map_contains (id->decl_map, arg))
+      {
+	/* Make an equivalent VAR_DECL.  If the argument was used
+	   as temporary variable later in function, the uses will be
+	   replaced by local variable.  */
+	tree var = copy_decl_to_var (arg, id);
+	add_referenced_var (var);
+	insert_decl_map (id, arg, var);
+        /* Declare this new variable.  */
+        DECL_CHAIN (var) = *vars;
+        *vars = var;
+      }
+  return new_parm;
+}
+
+
+
 /* Return a copy of the function's static chain.  */
 static tree
 copy_static_chain (tree static_chain, copy_body_data * id)
@@ -5333,6 +5421,290 @@ tree_function_versioning (tree old_decl, tree new_decl,
   return;
 }
 
+void
+tree_elem_fn_versioning (tree old_decl, tree new_decl,
+			 VEC(ipa_replace_map_p,gc)* tree_map,
+			 bool update_clones, bitmap args_to_skip,
+			 bool skip_return, bitmap blocks_to_copy,
+			 basic_block new_entry, int vlength)
+{
+  struct cgraph_node *old_version_node;
+  struct cgraph_node *new_version_node;
+  copy_body_data id;
+  tree p;
+  unsigned i;
+  struct ipa_replace_map *replace_info;
+  basic_block old_entry_block, bb;
+  VEC (gimple, heap) *init_stmts = VEC_alloc (gimple, heap, 10);
+
+  tree old_current_function_decl = current_function_decl;
+  tree vars = NULL_TREE;
+
+  gcc_assert (TREE_CODE (old_decl) == FUNCTION_DECL
+	      && TREE_CODE (new_decl) == FUNCTION_DECL);
+  DECL_POSSIBLY_INLINED (old_decl) = 1;
+
+  old_version_node = cgraph_get_node (old_decl);
+  gcc_checking_assert (old_version_node);
+  new_version_node = cgraph_get_node (new_decl);
+  gcc_checking_assert (new_version_node);
+
+  if (TREE_TYPE (TREE_TYPE (old_decl)) != void_type_node)
+    {
+      TREE_TYPE (TREE_TYPE (new_decl)) =
+	copy_node (TREE_TYPE (TREE_TYPE (old_decl)));
+      TREE_TYPE (TREE_TYPE (new_decl)) =
+	build_vector_type (TREE_TYPE (TREE_TYPE (new_decl)), vlength);
+    }
+  
+  
+  /* Copy over debug args.  */
+  if (DECL_HAS_DEBUG_ARGS_P (old_decl))
+    {
+      VEC(tree, gc) **new_debug_args, **old_debug_args;
+      gcc_checking_assert (decl_debug_args_lookup (new_decl) == NULL);
+      DECL_HAS_DEBUG_ARGS_P (new_decl) = 0;
+      old_debug_args = decl_debug_args_lookup (old_decl);
+      if (old_debug_args)
+	{
+	  new_debug_args = decl_debug_args_insert (new_decl);
+	  *new_debug_args = VEC_copy (tree, gc, *old_debug_args);
+	}
+    }
+
+  /* Output the inlining info for this abstract function, since it has been
+     inlined.  If we don't do this now, we can lose the information about the
+     variables in the function when the blocks get blown away as soon as we
+     remove the cgraph node.  */
+  (*debug_hooks->outlining_inline_function) (old_decl);
+
+  DECL_ARTIFICIAL (new_decl) = 1;
+  DECL_ABSTRACT_ORIGIN (new_decl) = DECL_ORIGIN (old_decl);
+  DECL_FUNCTION_PERSONALITY (new_decl) = DECL_FUNCTION_PERSONALITY (old_decl);
+
+  /* Prepare the data structures for the tree copy.  */
+  memset (&id, 0, sizeof (id));
+
+  /* Generate a new name for the new version. */
+  id.statements_to_fold = pointer_set_create ();
+
+  id.decl_map = pointer_map_create ();
+  id.debug_map = NULL;
+  id.src_fn = old_decl;
+  id.dst_fn = new_decl;
+  id.src_node = old_version_node;
+  id.dst_node = new_version_node;
+  id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
+  if (id.src_node->ipa_transforms_to_apply)
+    {
+      VEC(ipa_opt_pass,heap) * old_transforms_to_apply =
+	id.dst_node->ipa_transforms_to_apply;
+      unsigned int i;
+
+      id.dst_node->ipa_transforms_to_apply =
+	VEC_copy (ipa_opt_pass, heap, id.src_node->ipa_transforms_to_apply);
+      for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
+        VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply,
+		       VEC_index (ipa_opt_pass,
+		       		  old_transforms_to_apply,
+				  i));
+    }
+
+  id.copy_decl = copy_decl_no_change;
+  id.transform_call_graph_edges
+    = update_clones ? CB_CGE_MOVE_CLONES : CB_CGE_MOVE;
+  id.transform_new_cfg = true;
+  id.transform_return_to_modify = false;
+  id.transform_lang_insert_block = NULL;
+
+  current_function_decl = new_decl;
+  old_entry_block = ENTRY_BLOCK_PTR_FOR_FUNCTION
+    (DECL_STRUCT_FUNCTION (old_decl));
+  initialize_cfun (new_decl, old_decl,
+		   old_entry_block->count);
+  DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta
+    = id.src_cfun->gimple_df->ipa_pta;
+  push_cfun (DECL_STRUCT_FUNCTION (new_decl));
+
+  /* Copy the function's static chain.  */
+  p = DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl;
+  if (p)
+    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl =
+      copy_static_chain (DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl,
+			 &id);
+
+  /* If there's a tree_map, prepare for substitution.  */
+  if (tree_map)
+    for (i = 0; i < VEC_length (ipa_replace_map_p, tree_map); i++)
+      {
+	gimple init;
+	replace_info = VEC_index (ipa_replace_map_p, tree_map, i);
+	if (replace_info->replace_p)
+	  {
+	    tree op = replace_info->new_tree;
+	    if (!replace_info->old_tree)
+	      {
+		int i = replace_info->parm_num;
+		tree parm;
+		for (parm = DECL_ARGUMENTS (old_decl); i;
+		     parm = DECL_CHAIN (parm))
+		  i --;
+		replace_info->old_tree = parm;
+	      }
+		
+
+	    STRIP_NOPS (op);
+
+	    if (TREE_CODE (op) == VIEW_CONVERT_EXPR)
+	      op = TREE_OPERAND (op, 0);
+
+	    if (TREE_CODE (op) == ADDR_EXPR)
+	      {
+		op = TREE_OPERAND (op, 0);
+		while (handled_component_p (op))
+		  op = TREE_OPERAND (op, 0);
+		if (TREE_CODE (op) == VAR_DECL)
+		  add_referenced_var (op);
+	      }
+	    gcc_assert (TREE_CODE (replace_info->old_tree) == PARM_DECL);
+	    init = setup_one_parameter (&id, replace_info->old_tree,
+	    			        replace_info->new_tree, id.src_fn,
+				        NULL,
+				        &vars);
+	    if (init)
+	      VEC_safe_push (gimple, heap, init_stmts, init);
+	  }
+      }
+  /* Copy the function's arguments.  */
+  if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
+    DECL_ARGUMENTS (new_decl) =
+      elem_fn_copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id,
+					     args_to_skip, &vars, vlength);
+
+  DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
+  BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;
+
+  declare_inline_vars (DECL_INITIAL (new_decl), vars);
+
+  if (!VEC_empty (tree, DECL_STRUCT_FUNCTION (old_decl)->local_decls))
+    /* Add local vars.  */
+    elem_fn_add_local_variables (DECL_STRUCT_FUNCTION (old_decl), cfun, &id,
+				 false, vlength);
+
+  if (DECL_RESULT (old_decl) == NULL_TREE)
+    ;
+  else if (skip_return && !VOID_TYPE_P (TREE_TYPE (DECL_RESULT (old_decl))))
+    {
+      DECL_RESULT (new_decl)
+	= build_decl (DECL_SOURCE_LOCATION (DECL_RESULT (old_decl)),
+		      RESULT_DECL, NULL_TREE, void_type_node);
+      DECL_CONTEXT (DECL_RESULT (new_decl)) = new_decl;
+      cfun->returns_struct = 0;
+      cfun->returns_pcc_struct = 0;
+    }
+  else
+    {
+      tree old_name;
+      DECL_RESULT (new_decl) = remap_decl (DECL_RESULT (old_decl), &id);
+      /* bviyer; we are just using 4 for vectorlength just to see if it works */
+      if (TREE_TYPE (DECL_RESULT (new_decl)) != void_type_node)
+	{
+	  TREE_TYPE (DECL_RESULT (new_decl)) =
+	    build_vector_type (copy_node (TREE_TYPE (DECL_RESULT (new_decl))),
+			       vlength);
+	  DECL_MODE (DECL_RESULT (new_decl)) =
+	    TYPE_MODE (TREE_TYPE (DECL_RESULT (new_decl)));
+	}
+      lang_hooks.dup_lang_specific_decl (DECL_RESULT (new_decl));
+      if (gimple_in_ssa_p (id.src_cfun)
+	  && DECL_BY_REFERENCE (DECL_RESULT (old_decl))
+	  && (old_name
+	      = gimple_default_def (id.src_cfun, DECL_RESULT (old_decl))))
+	{
+	  tree new_name = make_ssa_name (DECL_RESULT (new_decl), NULL);
+	  insert_decl_map (&id, old_name, new_name);
+	  SSA_NAME_DEF_STMT (new_name) = gimple_build_nop ();
+	  set_default_def (DECL_RESULT (new_decl), new_name);
+	}
+    }
+
+  /* Copy the Function's body.  */
+  copy_body (&id, old_entry_block->count, REG_BR_PROB_BASE,
+	     ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, blocks_to_copy, new_entry);
+
+  /* Renumber the lexical scoping (non-code) blocks consecutively.  */
+  number_blocks (new_decl);
+
+  /* We want to create the BB unconditionally, so that the addition of
+     debug stmts doesn't affect BB count, which may in the end cause
+     codegen differences.  */
+  bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR));
+  while (VEC_length (gimple, init_stmts))
+    insert_init_stmt (&id, bb, VEC_pop (gimple, init_stmts));
+  update_clone_info (&id);
+
+  /* Remap the nonlocal_goto_save_area, if any.  */
+  if (cfun->nonlocal_goto_save_area)
+    {
+      struct walk_stmt_info wi;
+
+      memset (&wi, 0, sizeof (wi));
+      wi.info = &id;
+      walk_tree (&cfun->nonlocal_goto_save_area, remap_gimple_op_r, &wi, NULL);
+    }
+
+  /* Clean up.  */
+  pointer_map_destroy (id.decl_map);
+  if (id.debug_map)
+    pointer_map_destroy (id.debug_map);
+  free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
+
+  fold_marked_statements (0, id.statements_to_fold);
+  pointer_set_destroy (id.statements_to_fold);
+  fold_cond_expr_cond ();
+  delete_unreachable_blocks_update_callgraph (&id);
+  if (id.dst_node->analyzed)
+    cgraph_rebuild_references ();
+  update_ssa (TODO_update_ssa);
+
+  /* After partial cloning we need to rescale frequencies, so they are
+     within proper range in the cloned function.  */
+  if (new_entry)
+    {
+      struct cgraph_edge *e;
+      rebuild_frequencies ();
+
+      new_version_node->count = ENTRY_BLOCK_PTR->count;
+      for (e = new_version_node->callees; e; e = e->next_callee)
+	{
+	  basic_block bb = gimple_bb (e->call_stmt);
+	  e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
+							 bb);
+	  e->count = bb->count;
+	}
+      for (e = new_version_node->indirect_calls; e; e = e->next_callee)
+	{
+	  basic_block bb = gimple_bb (e->call_stmt);
+	  e->frequency = compute_call_stmt_bb_frequency (current_function_decl,
+							 bb);
+	  e->count = bb->count;
+	}
+    }
+
+  free_dominance_info (CDI_DOMINATORS);
+  free_dominance_info (CDI_POST_DOMINATORS);
+
+  gcc_assert (!id.debug_stmts);
+  VEC_free (gimple, heap, init_stmts);
+  pop_cfun ();
+  current_function_decl = old_current_function_decl;
+  gcc_assert (!current_function_decl
+	      || DECL_STRUCT_FUNCTION (current_function_decl) == cfun);
+  return;
+}
+
+
 /* EXP is CALL_EXPR present in a GENERIC expression tree.  Try to integrate
    the callee and return the inlined body on success.  */
 

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2012-04-05 22:37 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-04-05 22:37 [PATCH][Cilkplus] Elemental function cloning with vector params Iyer, Balaji V

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).