Hi Jakub, This patch was reduced a bit and most of your comments were considered in the last submission of the environment variable syntax extension patch (https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html). This patch also builds on that envvar patch version. The nteams-var related content was moved from this patch to the envvar patch as that is closely connected. However, additional testing and testing of copy back device-specific nteams-var ICV values is still included in this patch together with the teams-thread-limit-var content. >> --- a/gcc/gimplify.cc >> +++ b/gcc/gimplify.cc >> @@ -13994,7 +13994,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p) >> struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp; >> >> if (teams == NULL_TREE) >> - num_teams_upper = integer_one_node; >> + num_teams_upper = integer_minus_two_node; > > No, please don't introduce this, it is quite costly to have a GC trees > like integer_one_node, so they should stay for the most commonly used > numbers, -2 isn't like that. Just build_int_cst (integer_type_node, -2). integer_minus_two_node was replaced by "build_int_cst (integer_type_node, -2)". > >> --- a/gcc/tree-core.h >> +++ b/gcc/tree-core.h >> @@ -642,6 +642,7 @@ enum tree_index { >> TI_INTEGER_ONE, >> TI_INTEGER_THREE, >> TI_INTEGER_MINUS_ONE, >> + TI_INTEGER_MINUS_TWO, >> TI_NULL_POINTER, >> >> TI_SIZE_ZERO, >> diff --git a/gcc/tree.cc b/gcc/tree.cc >> index 8f83ea1..8cb474d 100644 >> --- a/gcc/tree.cc >> +++ b/gcc/tree.cc >> @@ -9345,6 +9345,7 @@ build_common_tree_nodes (bool signed_char) >> integer_one_node = build_int_cst (integer_type_node, 1); >> integer_three_node = build_int_cst (integer_type_node, 3); >> integer_minus_one_node = build_int_cst (integer_type_node, -1); >> + integer_minus_two_node = build_int_cst (integer_type_node, -2); >> >> size_zero_node = size_int (0); >> size_one_node = size_int (1); >> diff --git a/gcc/tree.h b/gcc/tree.h >> index cea49a5..1aeb009 100644 >> --- a/gcc/tree.h >> +++ b/gcc/tree.h >> @@ -4206,6 +4206,7 @@ tree_strip_any_location_wrapper (tree exp) >> #define integer_one_node global_trees[TI_INTEGER_ONE] >> #define integer_three_node global_trees[TI_INTEGER_THREE] >> #define integer_minus_one_node global_trees[TI_INTEGER_MINUS_ONE] >> +#define integer_minus_two_node global_trees[TI_INTEGER_MINUS_TWO] >> #define size_zero_node global_trees[TI_SIZE_ZERO] >> #define size_one_node global_trees[TI_SIZE_ONE] >> #define bitsize_zero_node global_trees[TI_BITSIZE_ZERO] > > And drop the above 3 hunks. Removed. > >> --- a/libgomp/config/gcn/icv-device.c >> +++ b/libgomp/config/gcn/icv-device.c >> @@ -37,6 +37,7 @@ volatile int GOMP_DEFAULT_DEVICE_VAR; >> volatile int GOMP_MAX_ACTIVE_LEVELS_VAR; >> volatile omp_proc_bind_t GOMP_BIND_VAR; >> volatile int GOMP_NTEAMS_VAR; >> +volatile int GOMP_TEAMS_THREAD_LIMIT_VAR; > > I really don't like this copying of individual ICVs one by one to the > device, copy a struct containing them and access fields in that struct. I recently changed this in https://gcc.gnu.org/pipermail/gcc-patches/2022-August/599175.html. So there is one struct containing all ICVs that are copied from host to the device and back. > >> --- a/libgomp/libgomp-plugin.h >> +++ b/libgomp/libgomp-plugin.h >> @@ -116,6 +116,7 @@ struct addr_pair >> #define GOMP_MAX_ACTIVE_LEVELS_VAR __gomp_max_active_levels >> #define GOMP_BIND_VAR __gomp_bind >> #define GOMP_NTEAMS_VAR __gomp_nteams >> +#define GOMP_TEAMS_THREAD_LIMIT_VAR __gomp_teams_thread_limit_var > > Likewise here. Those were all removed. > >> @@ -527,13 +538,19 @@ struct gomp_icv_list { >> >> extern void *gomp_get_icv_value_ptr (struct gomp_icv_list **list, >> int device_num); >> -extern struct gomp_icv_list *gomp_run_sched_var_dev_list; >> -extern struct gomp_icv_list *gomp_run_sched_chunk_size_dev_list; >> +extern struct gomp_icv_list* gomp_add_device_specific_icv (int dev_num, >> + size_t size, >> + struct gomp_icv_list **list); >> +extern struct gomp_icv_list *gomp_initial_run_sched_var_dev_list; >> +extern struct gomp_icv_list *gomp_initial_run_sched_chunk_size_dev_list; >> +extern struct gomp_icv_list *gomp_initial_max_active_levels_var_dev_list; >> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_dev_list; >> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_dev_list; >> +extern struct gomp_icv_list *gomp_initial_proc_bind_var_list_len_dev_list; >> +extern struct gomp_icv_list *gomp_initial_nteams_var_dev_list; >> + >> extern struct gomp_icv_list *gomp_nteams_var_dev_list; >> -extern struct gomp_icv_list *gomp_max_active_levels_var_dev_list; >> -extern struct gomp_icv_list *gomp_proc_bind_var_dev_list; >> -extern struct gomp_icv_list *gomp_proc_bind_var_list_dev_list; >> -extern struct gomp_icv_list *gomp_proc_bind_var_list_len_dev_list; >> +extern struct gomp_icv_list *gomp_teams_thread_limit_var_dev_list; > > Nor these per-var lists. For a specific device, walk the list with > all the vars in it, start with the most specific (matching dev number), > then just dev and then all and fill in from it what is going to be copied. The above lists were removed and instead one list for device-specific ICV structs was introduced in the above mentioned patch. >> --- a/libgomp/plugin/plugin-gcn.c >> +++ b/libgomp/plugin/plugin-gcn.c >> @@ -572,7 +572,8 @@ static char *GOMP_ICV_STRINGS[] = >> XSTRING (GOMP_DYN_VAR), >> XSTRING (GOMP_MAX_ACTIVE_LEVELS_VAR), >> XSTRING (GOMP_BIND_VAR), >> - XSTRING (GOMP_NTEAMS_VAR) >> + XSTRING (GOMP_NTEAMS_VAR), >> + XSTRING (GOMP_TEAMS_THREAD_LIMIT_VAR) > > Then you don't need to e.g. track the names of the individual vars, just > one for the whole ICV block. That array was also removed. The patch was tested on x86_64-linux with nvptx and amdgcn offloading without regression. Marcel ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955