From 90cb91ca75ce29e560184fbd1ca03a7e58fc6685 Mon Sep 17 00:00:00 2001 From: marxin Date: Thu, 26 Nov 2015 10:18:44 +0100 Subject: [PATCH 1/5] HSA: implement omp_get_level libgomp/ChangeLog: 2015-11-26 Martin Liska * plugin/plugin-hsa.c (struct hsa_kernel_description): Add field gridified_kernel_p. (struct kernel_info): Likewise. (GOMP_OFFLOAD_load_image): Fill-up the field. (init_single_kernel): Dump value of the field. (create_kernel_dispatch): Set-up omp_level for kernel packet dispatch structure. gcc/ChangeLog: 2015-11-26 Martin Liska * hsa-brig.c (hsa_output_kernels): Append gridified_kernel_p to kernel_info structure. * hsa-gen.c (gen_get_level): Generate call of the builtin. (gen_hsa_insns_for_known_library_call): Call the aforementioned function. (generate_hsa):Output gridified_kernel_p from HSA summary. * hsa.c (struct hsa_decl_kernel_map_element): Add gridified_kernel_p field. (hsa_add_kern_decl_mapping): Add argument for the field. (hsa_get_decl_kernel_mapping_gridified): New function. (hsa_summary_t::link_functions): Add new argument for gridified_kernel_p. (hsa_register_kernel): Mark gridified kernel within HSA summary. * hsa.h (struct hsa_function_summary): Declare new field in HSA summary. * ipa-hsa.c (process_hsa_functions): Use modified signature of link_functions. include/ChangeLog: 2015-11-26 Martin Liska * gomp-constants.h (struct GOMP_hsa_kernel_dispatch): Declare new field in kernel dispatch structure. --- gcc/hsa-brig.c | 27 ++++++++++++++++++--------- gcc/hsa-gen.c | 39 ++++++++++++++++++++++++++++++++++++++- gcc/hsa.c | 25 ++++++++++++++++++------- gcc/hsa.h | 11 ++++++++--- gcc/ipa-hsa.c | 4 ++-- include/gomp-constants.h | 2 ++ libgomp/plugin/plugin-hsa.c | 7 +++++++ 7 files changed, 93 insertions(+), 22 deletions(-) diff --git a/gcc/hsa-brig.c b/gcc/hsa-brig.c index ca30598..9f65d50 100644 --- a/gcc/hsa-brig.c +++ b/gcc/hsa-brig.c @@ -1982,15 +1982,19 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) unsigned_type_node); DECL_CHAIN (id_f2) = id_f1; tree id_f3 = build_decl (BUILTINS_LOCATION, FIELD_DECL, - get_identifier ("kernel_dependencies_count"), - unsigned_type_node); + get_identifier ("gridified_kernel_p"), + boolean_type_node); DECL_CHAIN (id_f3) = id_f2; tree id_f4 = build_decl (BUILTINS_LOCATION, FIELD_DECL, + get_identifier ("kernel_dependencies_count"), + unsigned_type_node); + DECL_CHAIN (id_f4) = id_f3; + tree id_f5 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("kernel_dependencies"), build_pointer_type (build_pointer_type (char_type_node))); - DECL_CHAIN (id_f4) = id_f3; - finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f4, + DECL_CHAIN (id_f5) = id_f4; + finish_builtin_struct (kernel_info_type, "__hsa_kernel_info", id_f5, NULL_TREE); int_num_of_kernels = build_int_cstu (uint32_type_node, map_count); @@ -2018,7 +2022,10 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) free (copy); unsigned omp_size = hsa_get_decl_kernel_mapping_omp_size (i); - tree omp_data_size = build_int_cstu (uint32_type_node, omp_size); + tree omp_data_size = build_int_cstu (unsigned_type_node, omp_size); + bool gridified_kernel_p = hsa_get_decl_kernel_mapping_gridified (i); + tree gridified_kernel_p_tree = build_int_cstu (boolean_type_node, + gridified_kernel_p); unsigned count = 0; kernel_dependencies_vector_type = build_array_type @@ -2057,7 +2064,7 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) } } - tree dependencies_count = build_int_cstu (uint32_type_node, count); + tree dependencies_count = build_int_cstu (unsigned_type_node, count); vec *kernel_info_vec = NULL; CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, @@ -2066,11 +2073,10 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) (kern_name)), kern_name)); CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, omp_data_size); + CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, + gridified_kernel_p_tree); CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, dependencies_count); - tree kernel_info_ctor = build_constructor (kernel_info_type, - kernel_info_vec); - if (count > 0) { ASM_GENERATE_INTERNAL_LABEL (tmp_name, "__hsa_dependencies_list", i); @@ -2098,6 +2104,9 @@ hsa_output_kernels (tree *host_func_table, tree *kernels) else CONSTRUCTOR_APPEND_ELT (kernel_info_vec, NULL_TREE, null_pointer_node); + tree kernel_info_ctor = build_constructor (kernel_info_type, + kernel_info_vec); + CONSTRUCTOR_APPEND_ELT (kernel_info_vector_vec, NULL_TREE, kernel_info_ctor); } diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c index 0df1eb6..5993ed5 100644 --- a/gcc/hsa-gen.c +++ b/gcc/hsa-gen.c @@ -3594,6 +3594,38 @@ gen_get_team_num (gimple *stmt, hsa_bb *hbb) hbb->append_insn (basic); } +/* Emit instructions that get levels-var ICV to lhs of gimple STMT. + Instructions are appended to basic block HBB. */ + +static void +gen_get_level (gimple *stmt, hsa_bb *hbb) +{ + if (gimple_call_lhs (stmt) == NULL_TREE) + return; + + hbb->append_insn (new hsa_insn_comment ("omp_get_level")); + + tree lhs = gimple_call_lhs (stmt); + hsa_op_reg *dest = hsa_cfun->reg_for_gimple_ssa (lhs); + + hsa_op_reg *shadow_reg_ptr = hsa_cfun->get_shadow_reg (); + if (shadow_reg_ptr == NULL) + { + HSA_SORRY_AT (gimple_location (stmt), + "support for HSA does not implement omp_get_level called " + "from a function not being inlined within a kernel"); + return; + } + + hsa_op_address *addr = new hsa_op_address + (shadow_reg_ptr, offsetof (GOMP_hsa_kernel_dispatch, omp_level)); + + hsa_insn_mem *mem = new hsa_insn_mem (BRIG_OPCODE_LD, BRIG_TYPE_U64, NULL, + addr); + hbb->append_insn (mem); + mem->set_output_in_type (dest, 0, hbb); +} + /* Emit instructions that implement alloca builtin gimple STMT. Instructions are appended to basic block HBB. */ @@ -3687,6 +3719,8 @@ gen_hsa_insns_for_known_library_call (gimple *stmt, hsa_bb *hbb) gen_get_num_teams (stmt, hbb); else if (strcmp (name, "omp_get_team_num") == 0) gen_get_team_num (stmt, hbb); + else if (strcmp (name, "omp_get_level") == 0) + gen_get_level (stmt, hbb); else if (strcmp (name, "hsa_set_debug_value") == 0) { if (hsa_cfun->has_shadow_reg_p ()) @@ -5576,8 +5610,11 @@ generate_hsa (bool kernel) if (hsa_cfun->m_kern_p) { + hsa_function_summary *s = hsa_summaries->get + (cgraph_node::get (hsa_cfun->m_decl)); hsa_add_kern_decl_mapping (current_function_decl, hsa_cfun->m_name, - hsa_cfun->m_maximum_omp_data_size); + hsa_cfun->m_maximum_omp_data_size, + s->m_gridified_kernel_p); } #ifdef ENABLE_CHECKING diff --git a/gcc/hsa.c b/gcc/hsa.c index 8ab5da7..7c4e404 100644 --- a/gcc/hsa.c +++ b/gcc/hsa.c @@ -52,6 +52,8 @@ struct GTY(()) hsa_decl_kernel_map_element char * GTY((skip)) name; /* Size of OMP data, if the kernel contains a kernel dispatch. */ unsigned omp_data_size; + /* True if the function is gridified kernel. */ + bool gridified_kernel_p; }; /* Mapping between decls and corresponding HSA kernels in this compilation @@ -584,12 +586,14 @@ hsa_destroy_operand (hsa_op_base *op) /* Create a mapping between the original function DECL and kernel name NAME. */ void -hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size) +hsa_add_kern_decl_mapping (tree decl, char *name, unsigned omp_data_size, + bool gridified_kernel_p) { hsa_decl_kernel_map_element dkm; dkm.decl = decl; dkm.name = name; dkm.omp_data_size = omp_data_size; + dkm.gridified_kernel_p = gridified_kernel_p; vec_safe_push (hsa_decl_kernel_mapping, dkm); } @@ -625,6 +629,14 @@ hsa_get_decl_kernel_mapping_omp_size (unsigned i) return (*hsa_decl_kernel_mapping)[i].omp_data_size; } +/* Return if the function is gridified kernel in decl name mapping. */ + +bool +hsa_get_decl_kernel_mapping_gridified (unsigned i) +{ + return (*hsa_decl_kernel_mapping)[i].gridified_kernel_p; +} + /* Free the mapping between original decls and kernel names. */ void @@ -708,13 +720,9 @@ hsa_get_declaration_name (tree decl) return NULL; } -/* Couple GPU and HOST as gpu-specific and host-specific implementation of the - same function. KIND determines whether GPU is a host-invokable kernel or - gpu-callable function. */ - void hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, - hsa_function_kind kind) + hsa_function_kind kind, bool gridified_kernel_p) { hsa_function_summary *gpu_summary = get (gpu); hsa_function_summary *host_summary = get (host); @@ -725,6 +733,9 @@ hsa_summary_t::link_functions (cgraph_node *gpu, cgraph_node *host, gpu_summary->m_gpu_implementation_p = true; host_summary->m_gpu_implementation_p = false; + gpu_summary->m_gridified_kernel_p = gridified_kernel_p; + host_summary->m_gridified_kernel_p = gridified_kernel_p; + gpu_summary->m_binded_function = host; host_summary->m_binded_function = gpu; @@ -761,7 +772,7 @@ hsa_register_kernel (cgraph_node *gpu, cgraph_node *host) { if (hsa_summaries == NULL) hsa_summaries = new hsa_summary_t (symtab); - hsa_summaries->link_functions (gpu, host, HSA_KERNEL); + hsa_summaries->link_functions (gpu, host, HSA_KERNEL, true); } /* Return true if expansion of the current HSA function has already failed. */ diff --git a/gcc/hsa.h b/gcc/hsa.h index dc2202a..a59a725 100644 --- a/gcc/hsa.h +++ b/gcc/hsa.h @@ -1151,6 +1151,9 @@ struct hsa_function_summary /* Identifies if the function is an HSA function or a host function. */ bool m_gpu_implementation_p; + + /* True if the function is a gridified kernel. */ + bool m_gridified_kernel_p; }; inline @@ -1168,10 +1171,11 @@ public: /* Couple GPU and HOST as gpu-specific and host-specific implementation of the same function. KIND determines whether GPU is a host-invokable kernel - or gpu-callable function. */ + or gpu-callable function and GRIDIFIED_KERNEL_P is set if the function was + gridified in OMP. */ void link_functions (cgraph_node *gpu, cgraph_node *host, - hsa_function_kind kind); + hsa_function_kind kind, bool gridified_kernel_p); }; /* in hsa.c */ @@ -1200,11 +1204,12 @@ BrigAlignment8_t hsa_alignment_encoding (unsigned n); BrigAlignment8_t hsa_natural_alignment (BrigType16_t type); void hsa_destroy_operand (hsa_op_base *op); void hsa_destroy_insn (hsa_insn_basic *insn); -void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned); +void hsa_add_kern_decl_mapping (tree decl, char *name, unsigned, bool); unsigned hsa_get_number_decl_kernel_mappings (void); tree hsa_get_decl_kernel_mapping_decl (unsigned i); char *hsa_get_decl_kernel_mapping_name (unsigned i); unsigned hsa_get_decl_kernel_mapping_omp_size (unsigned i); +bool hsa_get_decl_kernel_mapping_gridified (unsigned i); void hsa_free_decl_kernel_mapping (void); void hsa_add_kernel_dependency (tree caller, const char *called_function); void hsa_sanitize_name (char *p); diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c index d77fa6b..a497841 100644 --- a/gcc/ipa-hsa.c +++ b/gcc/ipa-hsa.c @@ -92,7 +92,7 @@ process_hsa_functions (void) TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl); clone->force_output = true; - hsa_summaries->link_functions (clone, node, s->m_kind); + hsa_summaries->link_functions (clone, node, s->m_kind, false); if (dump_file) fprintf (dump_file, "Created a new HSA clone: %s, type: %s\n", @@ -109,7 +109,7 @@ process_hsa_functions (void) if (!cgraph_local_p (node)) clone->force_output = true; - hsa_summaries->link_functions (clone, node, HSA_FUNCTION); + hsa_summaries->link_functions (clone, node, HSA_FUNCTION, false); if (dump_file) fprintf (dump_file, "Created a new HSA function clone: %s\n", diff --git a/include/gomp-constants.h b/include/gomp-constants.h index b710eb1..1dae474 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -294,6 +294,8 @@ struct GOMP_hsa_kernel_dispatch uint32_t omp_num_threads; /* Debug purpose argument. */ uint64_t debug; + /* Levels-var ICV. */ + uint64_t omp_level; /* Kernel dispatch structures created for children kernel dispatches. */ struct GOMP_hsa_kernel_dispatch **children_dispatches; }; diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c index 39e2366..b132954 100644 --- a/libgomp/plugin/plugin-hsa.c +++ b/libgomp/plugin/plugin-hsa.c @@ -157,6 +157,7 @@ struct hsa_kernel_description { const char *name; unsigned omp_data_size; + bool gridified_kernel_p; unsigned kernel_dependencies_count; const char **kernel_dependencies; }; @@ -217,6 +218,8 @@ struct kernel_info unsigned dependencies_count; /* Maximum OMP data size necessary for kernel from kernel dispatches. */ unsigned max_omp_data_size; + /* True if the kernel is gridified. */ + bool gridified_kernel_p; }; /* Information about a particular brig module, its image and kernels. */ @@ -619,6 +622,7 @@ GOMP_OFFLOAD_load_image (int ord, unsigned version, void *target_data, kernel->module = module; kernel->name = d->name; kernel->omp_data_size = d->omp_data_size; + kernel->gridified_kernel_p = d->gridified_kernel_p; kernel->dependencies_count = d->kernel_dependencies_count; kernel->dependencies = d->kernel_dependencies; if (pthread_mutex_init (&kernel->init_mutex, NULL)) @@ -916,6 +920,7 @@ init_single_kernel (struct kernel_info *kernel, unsigned *max_omp_data_size) HSA_DEBUG (" kernarg_segment_size: %u\n", (unsigned) kernel->kernarg_segment_size); HSA_DEBUG (" omp_data_size: %u\n", kernel->omp_data_size); + HSA_DEBUG (" gridified_kernel_p: %u\n", kernel->gridified_kernel_p); if (kernel->omp_data_size > *max_omp_data_size) *max_omp_data_size = kernel->omp_data_size; @@ -1003,6 +1008,7 @@ create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) (kernel, omp_data_size); shadow->omp_num_threads = 64; shadow->debug = 0; + shadow->omp_level = kernel->gridified_kernel_p ? 1 : 0; /* Create kernel dispatch data structures. We do not allow to have a kernel dispatch with depth bigger than one. */ @@ -1014,6 +1020,7 @@ create_kernel_dispatch (struct kernel_info *kernel, unsigned omp_data_size) (dependency, omp_data_size); shadow->children_dispatches[i]->queue = kernel->agent->kernel_dispatch_command_q; + shadow->children_dispatches[i]->omp_level = 1; } return shadow; -- 2.6.3