This patch adds support for omp_get_max_teams, omp_set_num_teams, and omp_{gs}et_teams_thread_limit on offload devices. That includes the usage of device-specific ICV values (specified as environment variables or changed on a device). In order to reuse device-specific ICV values, a copy back mechanism is implemented that copies ICV values back from device to the host. gcc/ChangeLog: * gimplify.cc (optimize_target_teams): Set initial num_teams_upper to "-2" instead of "1" for non-existing num_teams clause in order to disambiguate from the case of an existing num_teams clause with value 1. libgomp/ChangeLog: * config/gcn/icv-device.c (omp_get_teams_thread_limit): Added to allow processing of device-specific values. (omp_set_teams_thread_limit): Likewise. (ialias): Likewise. * config/nvptx/icv-device.c (omp_get_teams_thread_limit): Likewise. (omp_set_teams_thread_limit): Likewise. (ialias): Likewise. * icv-device.c (omp_get_teams_thread_limit): Likewise. (ialias): Likewise. (omp_set_teams_thread_limit): Likewise. * icv.c (omp_set_teams_thread_limit): Removed. (omp_get_teams_thread_limit): Likewise. (ialias): Likewise. * target.c (get_gomp_offload_icvs): Added teams_thread_limit_var handling. (gomp_load_image_to_device): Added a size check for the ICVs struct variable. (gomp_copy_back_icvs): New function that is used in GOMP_target_ext to copy back the ICV values from device to host. (GOMP_target_ext): Update the number of teams and threads in the kernel args also considering device-specific values. * testsuite/libgomp.c-c++-common/icv-4.c: Bugfix. * testsuite/libgomp.c-c++-common/icv-5.c: Extended. * testsuite/libgomp.c-c++-common/icv-6.c: Extended. * testsuite/libgomp.c-c++-common/icv-7.c: Extended. * testsuite/libgomp.c-c++-common/icv-9.c: New test. * testsuite/libgomp.fortran/icv-5.f90: New test. * testsuite/libgomp.fortran/icv-6.f90: New test. gcc/testsuite/ChangeLog: * c-c++-common/gomp/target-teams-1.c: Adapt expected values for num_teams from "1" to "-2" in cases without num_teams clause. * g++.dg/gomp/target-teams-1.C: Likewise. * gfortran.dg/gomp/defaultmap-4.f90: Likewise. * gfortran.dg/gomp/defaultmap-5.f90: Likewise. * gfortran.dg/gomp/defaultmap-6.f90: Likewise. diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index dcdc852..b393ed8 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -14153,7 +14153,7 @@ optimize_target_teams (tree target, gimple_seq *pre_p) struct gimplify_omp_ctx *target_ctx = gimplify_omp_ctxp; if (teams == NULL_TREE) - num_teams_upper = integer_one_node; + num_teams_upper = build_int_cst (integer_type_node, -2); else for (c = OMP_TEAMS_CLAUSES (teams); c; c = OMP_CLAUSE_CHAIN (c)) { diff --git a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c index 51b8d48..74d60e1 100644 --- a/gcc/testsuite/c-c++-common/gomp/target-teams-1.c +++ b/gcc/testsuite/c-c++-common/gomp/target-teams-1.c @@ -81,5 +81,5 @@ foo (int a, int b, long c, long d) /* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */ /* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */ /* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */ -/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */ /* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */ diff --git a/gcc/testsuite/g++.dg/gomp/target-teams-1.C b/gcc/testsuite/g++.dg/gomp/target-teams-1.C index f78a608..29e5597 100644 --- a/gcc/testsuite/g++.dg/gomp/target-teams-1.C +++ b/gcc/testsuite/g++.dg/gomp/target-teams-1.C @@ -88,5 +88,5 @@ foo (int a, int b, long c, long d) /* { dg-final { scan-tree-dump-times "thread_limit\\(-1\\)" 3 "gimple" } } */ /* { dg-final { scan-tree-dump-times "num_teams\\(0\\)" 4 "gimple" } } */ /* { dg-final { scan-tree-dump-times "thread_limit\\(0\\)" 6 "gimple" } } */ -/* { dg-final { scan-tree-dump-times "num_teams\\(1\\)" 2 "gimple" } } */ +/* { dg-final { scan-tree-dump-times "num_teams\\(-2\\)" 2 "gimple" } } */ /* { dg-final { scan-tree-dump-times "thread_limit\\(1\\)" 0 "gimple" } } */ diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90 b/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90 index 7b182b5..9081159 100644 --- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-4.f90 @@ -141,5 +141,5 @@ end ! { dg-final { scan-tree-dump-times "map\\(to:\\.strxparr \\\[len:" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 2 "gimple" } } -! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(alloc\\)" 1 "gimple" } } -! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(alloc:scalar\\) defaultmap\\(to:aggregate\\) defaultmap\\(tofrom:allocatable\\) defaultmap\\(firstprivate:pointer\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(alloc\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(alloc:scalar\\) defaultmap\\(to:aggregate\\) defaultmap\\(tofrom:allocatable\\) defaultmap\\(firstprivate:pointer\\)" 1 "gimple" } } diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90 b/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90 index 1391274..91566ed 100644 --- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-5.f90 @@ -141,5 +141,5 @@ end ! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:" 2 "gimple" } } ! { dg-final { scan-tree-dump-times "map\\(to:\\*strxp \\\[len:" 1 "gimple" } } ! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 1 "gimple" } } -! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(to\\)" 1 "gimple" } } -! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(to:scalar\\) defaultmap\\(tofrom:aggregate\\) defaultmap\\(firstprivate:allocatable\\) defaultmap\\(default:pointer\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(to\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(to:scalar\\) defaultmap\\(tofrom:aggregate\\) defaultmap\\(firstprivate:allocatable\\) defaultmap\\(default:pointer\\)" 1 "gimple" } } diff --git a/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90 b/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90 index 9a81d0f..867e41a 100644 --- a/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90 +++ b/gcc/testsuite/gfortran.dg/gomp/defaultmap-6.f90 @@ -101,4 +101,4 @@ end ! { dg-final { scan-tree-dump-times "map\\(to:\\.strxparr \\\[len:" 1 "gimple" } } ! { dg-final { scan-tree-dump-times "map\\(to:strxparr \\\[pointer set, len:" 1 "gimple" } } ! { dg-final { scan-tree-dump-times "map\\(to:\\.strxp \\\[len:" 1 "gimple" } } -! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(1\\) thread_limit\\(0\\) defaultmap\\(default\\)" 1 "gimple" } } +! { dg-final { scan-tree-dump-times "#pragma omp target num_teams\\(-2\\) thread_limit\\(0\\) defaultmap\\(default\\)" 1 "gimple" } } diff --git a/libgomp/config/gcn/icv-device.c b/libgomp/config/gcn/icv-device.c index bf757ba..eb68881 100644 --- a/libgomp/config/gcn/icv-device.c +++ b/libgomp/config/gcn/icv-device.c @@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams) GOMP_ADDITIONAL_ICVS.nteams = num_teams; } +int +omp_get_teams_thread_limit (void) +{ + return GOMP_ADDITIONAL_ICVS.teams_thread_limit; +} + +void +omp_set_teams_thread_limit (int thread_limit) +{ + if (thread_limit >= 0) + GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) @@ -89,3 +102,5 @@ ialias (omp_is_initial_device) ialias (omp_get_device_num) ialias (omp_get_max_teams) ialias (omp_set_num_teams) +ialias (omp_get_teams_thread_limit) +ialias (omp_set_teams_thread_limit) diff --git a/libgomp/config/nvptx/icv-device.c b/libgomp/config/nvptx/icv-device.c index 6f869be..a3f00cf 100644 --- a/libgomp/config/nvptx/icv-device.c +++ b/libgomp/config/nvptx/icv-device.c @@ -81,6 +81,19 @@ omp_set_num_teams (int num_teams) GOMP_ADDITIONAL_ICVS.nteams = num_teams; } +int +omp_get_teams_thread_limit (void) +{ + return GOMP_ADDITIONAL_ICVS.teams_thread_limit; +} + +void +omp_set_teams_thread_limit (int thread_limit) +{ + if (thread_limit >= 0) + GOMP_ADDITIONAL_ICVS.teams_thread_limit = thread_limit; +} + ialias (omp_set_default_device) ialias (omp_get_default_device) ialias (omp_get_initial_device) @@ -89,3 +102,5 @@ ialias (omp_is_initial_device) ialias (omp_get_device_num) ialias (omp_get_max_teams) ialias (omp_set_num_teams) +ialias (omp_get_teams_thread_limit) +ialias (omp_set_teams_thread_limit) diff --git a/libgomp/icv-device.c b/libgomp/icv-device.c index d8acf0e..48607ce 100644 --- a/libgomp/icv-device.c +++ b/libgomp/icv-device.c @@ -97,3 +97,20 @@ omp_set_num_teams (int num_teams) } ialias (omp_set_num_teams) + +int +omp_get_teams_thread_limit (void) +{ + return gomp_teams_thread_limit_var; +} + +ialias (omp_get_teams_thread_limit) + +void +omp_set_teams_thread_limit (int thread_limit) +{ + if (thread_limit >= 0) + gomp_teams_thread_limit_var = thread_limit; +} + +ialias (omp_set_teams_thread_limit) diff --git a/libgomp/icv.c b/libgomp/icv.c index df423c0..9aef91c 100644 --- a/libgomp/icv.c +++ b/libgomp/icv.c @@ -148,19 +148,6 @@ omp_get_supported_active_levels (void) return gomp_supported_active_levels; } -void -omp_set_teams_thread_limit (int thread_limit) -{ - if (thread_limit >= 0) - gomp_teams_thread_limit_var = thread_limit; -} - -int -omp_get_teams_thread_limit (void) -{ - return gomp_teams_thread_limit_var; -} - int omp_get_cancellation (void) { @@ -261,8 +248,6 @@ ialias (omp_get_thread_limit) ialias (omp_set_max_active_levels) ialias (omp_get_max_active_levels) ialias (omp_get_supported_active_levels) -ialias (omp_set_teams_thread_limit) -ialias (omp_get_teams_thread_limit) ialias (omp_get_cancellation) ialias (omp_get_proc_bind) ialias (omp_get_max_task_priority) diff --git a/libgomp/target.c b/libgomp/target.c index 5763483..193c828 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -2155,6 +2155,19 @@ get_gomp_offload_icvs (int dev_num) new->icvs.nteams = gomp_default_icv_values.nteams_var; if (dev_x != NULL + && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) + new->icvs.teams_thread_limit = dev_x->icvs.teams_thread_limit_var; + else if (dev != NULL + && gomp_get_icv_flag (dev->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) + new->icvs.teams_thread_limit = dev->icvs.teams_thread_limit_var; + else if (all != NULL + && gomp_get_icv_flag (all->flags, GOMP_ICV_TEAMS_THREAD_LIMIT)) + new->icvs.teams_thread_limit = all->icvs.teams_thread_limit_var; + else + new->icvs.teams_thread_limit + = gomp_default_icv_values.teams_thread_limit_var; + + if (dev_x != NULL && gomp_get_icv_flag (dev_x->flags, GOMP_ICV_DEFAULT_DEVICE)) new->icvs.default_device = dev_x->icvs.default_device_var; else if (dev != NULL @@ -2290,7 +2303,14 @@ gomp_load_image_to_device (struct gomp_device_descr *devicep, unsigned version, int dev_num = (int) (devicep - &devices[0]); struct gomp_offload_icvs *icvs = get_gomp_offload_icvs (dev_num); size_t var_size = var->end - var->start; - + if (var_size != sizeof (struct gomp_offload_icvs)) + { + gomp_mutex_unlock (&devicep->lock); + if (is_register_lock) + gomp_mutex_unlock (®ister_lock); + gomp_fatal ("offload plugin managed 'icv struct' not of expected " + "format"); + } /* Copy the ICVs variable to place on device memory, hereby actually designating its device number into effect. */ gomp_copy_host2dev (devicep, NULL, (void *) var->start, icvs, @@ -2769,6 +2789,20 @@ clear_unsupported_flags (struct gomp_device_descr *devicep, unsigned int flags) return flags; } +static void +gomp_copy_back_icvs (struct gomp_device_descr *devicep, int device) +{ + struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); + if (item == NULL) + return; + + void *host_ptr = &item->icvs; + void *dev_ptr = omp_get_mapped_ptr (host_ptr, device); + if (dev_ptr != NULL) + gomp_copy_dev2host (devicep, NULL, host_ptr, dev_ptr, + sizeof (struct gomp_offload_icvs)); +} + /* Like GOMP_target, but KINDS is 16-bit, UNUSED is no longer present, and several arguments have been added: FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. @@ -2801,6 +2835,146 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, size_t tgt_align = 0, tgt_size = 0; bool fpc_done = false; + /* Obtain the original TEAMS and THREADS values from ARGS. */ + intptr_t orig_teams = 1, orig_threads = 0; + size_t num_args = 0, len = 1, teams_len = 1, threads_len = 1; + void **tmpargs = args; + while (*tmpargs) + { + intptr_t id = (intptr_t) *tmpargs++, val; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + { + val = (intptr_t) *tmpargs++; + len = 2; + } + else + { + val = id >> GOMP_TARGET_ARG_VALUE_SHIFT; + len = 1; + } + num_args += len; + if ((id & GOMP_TARGET_ARG_DEVICE_MASK) != GOMP_TARGET_ARG_DEVICE_ALL) + continue; + val = val > INT_MAX ? INT_MAX : val; + if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS) + { + orig_teams = val; + teams_len = len; + } + else if ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT) + { + orig_threads = val; + threads_len = len; + } + } + + intptr_t new_teams = orig_teams, new_threads = orig_threads; + /* ORIG_TEAMS == -2: No explicit teams construct specified. Set to 1. + ORIG_TEAMS == -1: TEAMS construct with NUM_TEAMS clause specified, but the + value could not be specified. No Change. + ORIG_TEAMS == 0: TEAMS construct without NUM_TEAMS clause. + Set device-specific value. + ORIG_TEAMS > 0: Value was already set through e.g. NUM_TEAMS clause. + No change. */ + if (orig_teams == -2) + new_teams = 1; + else if (orig_teams == 0) + { + struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); + if (item != NULL) + new_teams = item->icvs.nteams; + } + /* The device-specific teams-thread-limit is only set if (a) an explicit TEAMS + region exists, i.e. ORIG_TEAMS > -2, and (b) THREADS was not already set by + e.g. a THREAD_LIMIT clause. */ + if (orig_teams >= -2 && orig_threads == 0) + { + struct gomp_offload_icv_list *item = gomp_get_offload_icv_item (device); + if (item != NULL) + new_threads = item->icvs.teams_thread_limit; + } + + /* Copy and change the arguments list only if TEAMS or THREADS need to be + updated. */ + void **new_args = args; + if (orig_teams != new_teams || orig_threads != new_threads) + { + size_t tms_len = (orig_teams == new_teams + ? teams_len + : (new_teams > -(1 << 15) && new_teams < (1 << 15) + ? 1 : 2)); + size_t ths_len = (orig_threads == new_threads + ? threads_len + : (new_threads > -(1 << 15) && new_threads < (1 << 15) + ? 1 : 2)); + /* One additional item after the last arg must be NULL. */ + size_t new_args_cnt = num_args - teams_len - threads_len + tms_len + + ths_len + 1; + new_args = (void **) gomp_alloca (new_args_cnt * sizeof (void*)); + + tmpargs = args; + void **tmp_new_args = new_args; + /* Copy all args except TEAMS and THREADS. TEAMS and THREADS are copied + too if they have not been changed and skipped otherwise. */ + while (*tmpargs) + { + intptr_t id = (intptr_t) *tmpargs; + if (((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_NUM_TEAMS + && orig_teams != new_teams) + || ((id & GOMP_TARGET_ARG_ID_MASK) == GOMP_TARGET_ARG_THREAD_LIMIT + && orig_threads != new_threads)) + { + tmpargs++; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + tmpargs++; + } + else + { + *tmp_new_args++ = *tmpargs++; + if (id & GOMP_TARGET_ARG_SUBSEQUENT_PARAM) + *tmp_new_args++ = *tmpargs++; + } + } + + /* Add the new TEAMS arg to the new args list if it has been changed. */ + if (orig_teams != new_teams) + { + intptr_t new_val = new_teams; + if (tms_len == 1) + { + new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) + | GOMP_TARGET_ARG_NUM_TEAMS; + *tmp_new_args++ = (void *) new_val; + } + else + { + *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM + | GOMP_TARGET_ARG_NUM_TEAMS); + *tmp_new_args++ = (void *) new_val; + } + } + + /* Add the new THREADS arg to the new args list if it has been changed. */ + if (orig_threads != new_threads) + { + intptr_t new_val = new_threads; + if (ths_len == 1) + { + new_val = (new_val << GOMP_TARGET_ARG_VALUE_SHIFT) + | GOMP_TARGET_ARG_THREAD_LIMIT; + *tmp_new_args++ = (void *) new_val; + } + else + { + *tmp_new_args++ = (void *) (GOMP_TARGET_ARG_SUBSEQUENT_PARAM + | GOMP_TARGET_ARG_THREAD_LIMIT); + *tmp_new_args++ = (void *) new_val; + } + } + + *tmp_new_args = NULL; + } + flags = clear_unsupported_flags (devicep, flags); if (flags & GOMP_TARGET_FLAG_NOWAIT) @@ -2839,7 +3013,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, && !thr->task->final_task) { gomp_create_target_task (devicep, fn, mapnum, hostaddrs, - sizes, kinds, flags, depend, args, + sizes, kinds, flags, depend, new_args, GOMP_TARGET_TASK_BEFORE_MAP); return; } @@ -2885,7 +3059,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, tgt_align, tgt_size); } } - gomp_target_fallback (fn, hostaddrs, devicep, args); + gomp_target_fallback (fn, hostaddrs, devicep, new_args); return; } @@ -2915,7 +3089,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, } devicep->run_func (devicep->target_id, fn_addr, tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, - args); + new_args); if (tgt_vars) { htab_clear (refcount_set); @@ -2923,6 +3097,12 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, } if (refcount_set) htab_free (refcount_set); + + /* Copy back ICVs from device to host. + HOST_PTR is expected to exist since it was added in + gomp_load_image_to_device if not already available. */ + gomp_copy_back_icvs (devicep, device); + } /* Host fallback for GOMP_target_data{,_ext} routines. */ diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c index b987a33..9da0d63 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-4.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-4.c @@ -16,7 +16,7 @@ main () } else omp_set_num_teams (6); - if (getenv ("OMP_TEAMS_THREAD_LIMIT") == NULL + if (getenv ("OMP_TEAMS_THREAD_LIMIT") != NULL && strcmp (getenv ("OMP_TEAMS_THREAD_LIMIT"), "12") == 0) { if (omp_get_teams_thread_limit () != 12) diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c index 431cfc7..57e960e 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-5.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-5.c @@ -1,25 +1,210 @@ +/* { dg-additional-options "-DAMD" { target offload_target_amdgcn } } */ /* { dg-do run } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "42" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "43" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "44" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "45" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "46" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS "47" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS "5" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" } */ #include #include +#include int main () { - if (omp_get_max_teams () != 47) + if (omp_get_max_teams () != 5 + || omp_get_teams_thread_limit () != 4) abort (); + #pragma omp teams + { + if (omp_get_num_teams () > 5 + || omp_get_team_num () >= 5) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 4 + || omp_get_thread_num () >= 4) + abort (); + } + + omp_set_num_teams (4); + omp_set_teams_thread_limit (3); + if (omp_get_max_teams () != 4 + || omp_get_teams_thread_limit () != 3) + abort (); + + #pragma omp teams + { + if (omp_get_num_teams () > 4 + || omp_get_team_num () >= 4) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + || omp_get_thread_num () >= 3) + abort (); + } + + #pragma omp teams num_teams(3) thread_limit(2) + { + if (omp_get_num_teams () != 3 + || omp_get_team_num () >= 3) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 2 + || omp_get_thread_num () >= 2) + abort (); + } + + #pragma omp teams num_teams(5) thread_limit(4) + { + if (omp_get_num_teams () != 5 + || omp_get_team_num () >= 5) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 4 + || omp_get_thread_num () >= 4) + abort (); + } + int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices (); - for (int i=0; i < num_devices; i++) - #pragma omp target device (i) - if (omp_get_max_teams () != 42 + i) + + for (int i = 0; i < num_devices; i++) + { + #pragma omp target device (i) + if (omp_get_max_teams () != 6 + i + || omp_get_teams_thread_limit () != 5 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + #pragma omp parallel + if (omp_get_thread_limit () > 5 + i + || omp_get_thread_num () >= 5 + i) + abort (); + + #pragma omp target device (i) + { + omp_set_num_teams (5 + i); + omp_set_teams_thread_limit (4 + i); + if (omp_get_max_teams () != 5 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + } + + /* omp_set_num_teams and omp_set_teams_thread_limit above set the value + of nteams-var and teams-thread-limit-var ICVs on device 'i', which has + scope 'device' and should be avaible in subsequent target regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 5 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > 5 + i + || omp_get_team_num () >= 5 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 4 + i + || omp_get_thread_num () >= 4 + i) + abort (); + } + + #pragma omp target device (i) + #pragma omp teams num_teams(6 + i) thread_limit(5 + i) + { + if (omp_get_num_teams () > 6 + i + || omp_get_team_num () >= 6 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 5 + i + || omp_get_thread_num () >= 5 + i + || omp_get_num_teams () > 6 + i + || omp_get_team_num () >= 6 + i) + abort (); + } + + #pragma omp target device (i) + #pragma omp teams num_teams(4 + i) thread_limit(3 + i) + { + if (omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + i + || omp_get_thread_num () >= 3 + i + || omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + } + + #pragma omp target device (i) + #pragma omp teams thread_limit(3 + i) num_teams(4 + i) + { + if (omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + i + || omp_get_thread_num () >= 3 + i + || omp_get_num_teams () > 4 + i + || omp_get_team_num () >= 4 + i) + abort (); + } + + /* The NUM_TEAMS and THREAD_LIMIT clauses should not change the values + of the corresponding ICVs. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 5 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + + /* This tests a large number of teams and threads. If it is larger than + 2^15+1 then the according argument in the kernels arguments list + is encoded with two items instead of one. On NVIDIA there is an + adjustment for too large teams and threads. For AMD such adjustment + exists only for threads and will cause runtime errors with a two large + number of teams. */ + intptr_t large_num_teams = 66000; +#ifdef AMD + large_num_teams = 8; +#endif + intptr_t large_threads_limit = 67000; + #pragma omp target device (i) + { + omp_set_num_teams (large_num_teams + i); + omp_set_teams_thread_limit (large_threads_limit + i); + if (omp_get_max_teams () != large_num_teams + i + || omp_get_teams_thread_limit () != large_threads_limit + i) + abort (); + } + + #pragma omp target device (i) + if (omp_get_max_teams () != large_num_teams + i + || omp_get_teams_thread_limit () != large_threads_limit + i) abort (); + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > large_num_teams + i + || omp_get_team_num () >= large_num_teams + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > large_threads_limit + i + || omp_get_thread_num () >= large_threads_limit + i) + abort (); + } + } + return 0; } diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c index e199a18..1b17f2d 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-6.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-6.c @@ -1,9 +1,10 @@ /* { dg-do run } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "43" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } */ /* { dg-set-target-env-var OMP_SCHEDULE_ALL "guided,4" } */ /* { dg-set-target-env-var OMP_DYNAMIC_ALL "true" } */ -/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "44" } */ /* { dg-set-target-env-var OMP_THREAD_LIMIT_ALL "45" } */ /* { dg-set-target-env-var OMP_NUM_THREADS_ALL "46,3,2" } */ /* { dg-set-target-env-var OMP_MAX_ACTIVE_LEVELS_ALL "47" } */ @@ -12,8 +13,8 @@ /* This tests the hierarchical usage of ICVs on the device, i.e. if OMP_NUM_TEAMS_DEV_ is not configured, then the value of - OMP_NUM_TEAMS_DEV should be used. And if there is no environment variable - without suffix, then the corresponding _ALL variant should be used. */ + OMP_NUM_TEAMS_DEV should be used. And if OMP_NUM_TEAMS (without suffix) is + not defined, then OMP_NUM_TEAMS_ALL should be used for the host. */ #include #include @@ -26,10 +27,10 @@ main () int chunk_size; omp_get_schedule(&kind, &chunk_size); - if ((!getenv ("OMP_NUM_TEAMS") && omp_get_max_teams () != 42) + if ((!getenv ("OMP_NUM_TEAMS") && omp_get_max_teams () != 3) || (!getenv ("OMP_DYNAMIC") && !omp_get_dynamic ()) || (!getenv ("OMP_SCHEDULE") && (kind != 3 || chunk_size != 4)) - || (!getenv ("OMP_TEAMS_THREAD_LIMIT") && omp_get_teams_thread_limit () != 44) + || (!getenv ("OMP_TEAMS_THREAD_LIMIT") && omp_get_teams_thread_limit () != 2) || (!getenv ("OMP_THREAD_LIMIT") && omp_get_thread_limit () != 45) || (!getenv ("OMP_NUM_THREADS") && omp_get_max_threads () != 46) || (!getenv ("OMP_PROC_BIND") && omp_get_proc_bind () != omp_proc_bind_spread) @@ -44,9 +45,51 @@ main () name[sizeof ("OMP_NUM_TEAMS_DEV_1") - 2] = '0' + i; if (getenv (name)) continue; - #pragma omp target device (i) - if (omp_get_max_teams () != 43) + + #pragma omp target device (i) + if (omp_get_max_teams () != 4 + || omp_get_teams_thread_limit () != 3) abort (); + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > 4 + || omp_get_team_num () >= 4) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 3 + || omp_get_thread_num () >= 3) + abort (); + } + + #pragma omp target device (i) + { + omp_set_num_teams (3 + i); + omp_set_teams_thread_limit (2 + i); + if (omp_get_max_teams () != 3 + i + || omp_get_teams_thread_limit () != 2 + i) + abort (); + } + + /* omp_set_num_teams above set the value of nteams-var ICV on device 'i', + which has scope 'device' and should be avaible in subsequent target + regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 3 + i + || omp_get_teams_thread_limit () != 2 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + { + if (omp_get_num_teams () > 3 + i + || omp_get_team_num () >= 3 + i) + abort (); + #pragma omp parallel + if (omp_get_thread_limit () > 2 + i + || omp_get_thread_num () >= 2 + i) + abort (); + } } return 0; diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c index 70a716d..bbbd6df 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/icv-7.c +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-7.c @@ -1,5 +1,6 @@ /* { dg-do run } */ -/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "42" } */ +/* { dg-set-target-env-var OMP_NUM_TEAMS_ALL "7" } */ +/* { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } */ /* This tests the hierarchical usage of ICVs on the host and on devices, i.e. if OMP_NUM_TEAMS_DEV_, OMP_NUM_TEAMS_DEV, and @@ -9,18 +10,90 @@ #include #include +#include int main () { - if (omp_get_max_teams () != 42) + if ((!getenv ("OMP_NUM_TEAMS") && omp_get_max_teams () != 7) + || (!getenv ("OMP_TEAMS_THREAD_LIMIT") && omp_get_teams_thread_limit () != 2)) abort (); + #pragma omp teams + if ((!getenv ("OMP_NUM_TEAMS")) + && (omp_get_num_teams () > 7 || omp_get_team_num () >= 7)) + abort (); + + omp_set_num_teams (9); + omp_set_teams_thread_limit (3); + if (omp_get_max_teams () != 9 + || omp_get_teams_thread_limit () != 3) + abort (); + + #pragma omp teams + if (omp_get_num_teams () > 9 + || omp_get_team_num () >= 9) + abort (); + + #pragma omp teams num_teams(5) + if (omp_get_num_teams () > 5 + || omp_get_team_num () >= 5) + abort (); + + if (getenv ("OMP_NUM_TEAMS_DEV") || getenv ("OMP_TEAMS_THREAD_LIMIT_DEV")) + return 0; + int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices (); - for (int i=0; i < num_devices; i++) - #pragma omp target device (i) - if (omp_get_max_teams () != 42) + for (int i = 0; i < num_devices; i++) + { + char nteams[sizeof ("OMP_NUM_TEAMS_DEV_1")]; + strcpy (nteams, "OMP_NUM_TEAMS_DEV_1"); + nteams[sizeof ("OMP_NUM_TEAMS_DEV_1") - 2] = '0' + i; + char teams_thread_limit[sizeof ("OMP_TEAMS_THREAD_LIMIT_DEV_1")]; + strcpy (teams_thread_limit, "OMP_TEAMS_THREAD_LIMIT_DEV_1"); + teams_thread_limit[sizeof ("OMP_TEAMS_THREAD_LIMIT_DEV_1") - 2] = '0' + i; + if (getenv (nteams) || getenv (teams_thread_limit)) + continue; + + #pragma omp target device (i) + if (omp_get_max_teams () != 7 + || omp_get_teams_thread_limit () != 2) + abort (); + + #pragma omp target device (i) + #pragma omp teams + if (omp_get_num_teams () > 7 + || omp_get_team_num () >= 7) + abort (); + + #pragma omp target device (i) + { + omp_set_num_teams (8 + i); + omp_set_teams_thread_limit (4 + i); + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + } + + /* omp_set_num_teams above set the value of nteams-var ICV on device 'i', + which has scope 'device' and should be avaible in subsequent target + regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 4 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + if (omp_get_num_teams () > 8 + i + || omp_get_team_num () >= 8 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams num_teams(5 + i) + if (omp_get_num_teams () != 5 + i) abort (); + } return 0; } diff --git a/libgomp/testsuite/libgomp.c-c++-common/icv-9.c b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c new file mode 100644 index 0000000..21b874f --- /dev/null +++ b/libgomp/testsuite/libgomp.c-c++-common/icv-9.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ + +/* This tests usage of ICVs on the host and on devices if no corresponding + environment variables are configured. */ + +#include +#include + +int +main () +{ + if (omp_get_max_teams () != 0 + || omp_get_teams_thread_limit () != 0) + abort (); + + omp_set_num_teams (9); + omp_set_teams_thread_limit (2); + if (omp_get_max_teams () != 9 + || omp_get_teams_thread_limit () != 2) + abort (); + + #pragma omp teams + if (omp_get_num_teams () > 9 + || omp_get_team_num () >= 9) + abort (); + + #pragma omp teams num_teams(5) + if (omp_get_num_teams () > 5 + || omp_get_team_num () >= 5) + abort (); + + int num_devices = omp_get_num_devices () > 3 ? 3 : omp_get_num_devices (); + for (int i = 0; i < num_devices; i++) + { + #pragma omp target device (i) + if (omp_get_max_teams () != 0 + || omp_get_teams_thread_limit () != 0) + abort (); + + #pragma omp target device (i) + { + omp_set_num_teams (8 + i); + omp_set_teams_thread_limit (3 + i); + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 3 + i) + abort (); + } + + /* omp_set_num_teams above set the value of nteams-var ICV on device 'i', + which has scope 'device' and should be avaible in subsequent target + regions. */ + #pragma omp target device (i) + if (omp_get_max_teams () != 8 + i + || omp_get_teams_thread_limit () != 3 + i) + abort (); + + #pragma omp target device (i) + #pragma omp teams + if (omp_get_num_teams () > 8 + i + || omp_get_team_num () >= 8 + i) + abort (); + + /* NUM_TEAMS clause has priority over previously set NUM_TEAMS value. */ + #pragma omp target device (i) + #pragma omp teams num_teams(5 + i) + if (omp_get_num_teams () > 5 + i + || omp_get_team_num () >= 5 + i) + abort (); + } + + return 0; +} diff --git a/libgomp/testsuite/libgomp.fortran/icv-5.f90 b/libgomp/testsuite/libgomp.fortran/icv-5.f90 new file mode 100644 index 0000000..05a35fa --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/icv-5.f90 @@ -0,0 +1,226 @@ +! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } +! { dg-set-target-env-var OMP_NUM_TEAMS "5" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_0 "6" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_1 "7" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV_2 "8" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT "4" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_0 "5" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_1 "6" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV_2 "7" } + +use omp_lib +implicit none (type, external) + integer :: num_devices, i, large_num_teams, large_threads_limit + logical :: err + + if (omp_get_num_devices () > 3) then + num_devices = 3 + else + num_devices = omp_get_num_devices () + end if + + do i=0,num_devices-1 + + ! Testing NUM_TEAMS. + if (env_is_set_dev ("OMP_NUM_TEAMS_DEV_", i, 6 + i)) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 6 + i) err = .true. + !$omp end target + if (err) stop 1 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 2 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_num_teams (5 + i) + if (omp_get_max_teams () /= 5 + i) err = .true. + !$omp end target + if (err) stop 3 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 5 + i) err = .true. + !$omp end target + if (err) stop 4 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 5 + i .or. omp_get_team_num () >= 5 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 5 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams num_teams(6 + i) + if (omp_get_num_teams () > 6 + i .or. omp_get_team_num () >= 6 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 6 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams num_teams(4 + i) + if (omp_get_num_teams () > 4 + i .or. omp_get_team_num () >= 4 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 7 + + large_num_teams = 66000 + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_num_teams (large_num_teams + i) + if (omp_get_max_teams () /= large_num_teams + i) err = .true. + !$omp end target + if (err) stop 8 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= large_num_teams + i) err = .true. + !$omp end target + if (err) stop 9 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > large_num_teams + i & + .or. omp_get_team_num () >= large_num_teams + i) err = .true. + !$omp end teams + !$omp end target + if (err) stop 10 + end if + + ! Testing TEAMS-THREAD-LIMIT + if (env_is_set_dev ("OMP_TEAMS_THREAD_LIMIT_DEV_", i, 5 + i)) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 5 + i) err = .true. + !$omp end target + if (err) stop 11 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 12 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_teams_thread_limit (4 + i) + if (omp_get_teams_thread_limit () /= 4 + i) err = .true. + !$omp end target + if (err) stop 13 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 4 + i) err = .true. + !$omp end target + if (err) stop 14 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 4 + i .or. omp_get_thread_num () >= 4 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 15 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams thread_limit(5 + i) + !$omp parallel + if (omp_get_thread_limit () > 5 + i .or. omp_get_thread_num () >= 5 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 16 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams thread_limit(3 + i) + !$omp parallel + if (omp_get_thread_limit () > 3 + i .or. omp_get_thread_num () >= 3 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 17 + + large_threads_limit = 67000 + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_teams_thread_limit (large_threads_limit + i) + if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true. + !$omp end target + if (err) stop 18 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= large_threads_limit + i) err = .true. + !$omp end target + if (err) stop 19 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > large_threads_limit + i & + .or. omp_get_thread_num () >= large_threads_limit + i) err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 20 + end if + + end do + +contains + logical function env_is_set (name, val) + character(len=*) :: name, val + character(len=40) :: val2 + integer :: stat + call get_environment_variable (name, val2, status=stat) + if (stat == 0) then + if (val == val2) then + env_is_set = .true. + return + end if + else if (stat /= 1) then + error stop 30 + endif + env_is_set = .false. + end + logical function env_is_set_dev (name, dev_num, val) + character(len=*) :: name + integer :: dev_num, val + character(len=64) :: dev_num_str, env_var, val_str + dev_num_str = ADJUSTL(dev_num_str) + env_var = name // dev_num_str + val_str = ADJUSTL(val_str) + env_is_set_dev = env_is_set (TRIM(env_var), TRIM(val_str)) + end +end diff --git a/libgomp/testsuite/libgomp.fortran/icv-6.f90 b/libgomp/testsuite/libgomp.fortran/icv-6.f90 new file mode 100644 index 0000000..c8e6a0d --- /dev/null +++ b/libgomp/testsuite/libgomp.fortran/icv-6.f90 @@ -0,0 +1,140 @@ +! { dg-set-target-env-var OMP_NUM_TEAMS_ALL "3" } +! { dg-set-target-env-var OMP_NUM_TEAMS_DEV "4" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_ALL "2" } +! { dg-set-target-env-var OMP_TEAMS_THREAD_LIMIT_DEV "3" } + +! This test considers the hierarchical usage of ICVs on the device, i.e. if +! e.g. OMP_NUM_TEAMS_DEV_ is not configured, then the value of +! OMP_NUM_TEAMS_DEV should be used for the targets. + +use omp_lib +implicit none (type, external) + integer :: num_devices, i, stat, tmp + logical :: err + character(len=40) :: val + + ! The following environment variables should not be set. + call get_environment_variable ("OMP_NUM_TEAMS_DEV_0", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_NUM_TEAMS_DEV_1", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_NUM_TEAMS_DEV_2", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_0", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_1", val, status=stat) + if (stat /= 1) return + call get_environment_variable ("OMP_TEAMS_THREAD_LIMIT_DEV_2", val, status=stat) + if (stat /= 1) return + + if (omp_get_num_devices () > 3) then + num_devices = 3 + else + num_devices = omp_get_num_devices () + end if + + do i=0,num_devices-1 + + ! Testing NUM_TEAMS. + if (env_is_set ("OMP_NUM_TEAMS_DEV", "4")) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 4) err = .true. + !$omp end target + if (err) stop 1 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 4 .or. omp_get_team_num () >= 4) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 2 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_num_teams (3 + i) + if (omp_get_max_teams () /= 3 + i) err = .true. + !$omp end target + if (err) stop 3 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_max_teams () /= 3 + i) err = .true. + !$omp end target + if (err) stop 4 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + if (omp_get_num_teams () > 3 + i .or. omp_get_team_num () >= 3 + i) & + err = .true. + !$omp end teams + !$omp end target + if (err) stop 5 + end if + + ! Testing TEAMS-THREAD-LIMIT + if (env_is_set ("OMP_TEAMS_THREAD_LIMIT_DEV", "3")) then + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 3) err = .true. + !$omp end target + if (err) stop 6 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 3 .or. omp_get_thread_num () >= 3) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 7 + + err = .false. + !$omp target device(i) map(tofrom: err) + call omp_set_teams_thread_limit (2 + i) + if (omp_get_teams_thread_limit () /= 2 + i) err = .true. + !$omp end target + if (err) stop 8 + + err = .false. + !$omp target device(i) map(tofrom: err) + if (omp_get_teams_thread_limit () /= 2 + i) err = .true. + !$omp end target + if (err) stop 9 + + err = .false. + !$omp target device(i) map(tofrom: err) + !$omp teams + !$omp parallel + if (omp_get_thread_limit () > 2 + i .or. omp_get_thread_num () >= 2 + i) & + err = .true. + !$omp end parallel + !$omp end teams + !$omp end target + if (err) stop 10 + end if + + end do + +contains + logical function env_is_set (name, val) + character(len=*) :: name, val + character(len=40) :: val2 + integer :: stat + call get_environment_variable (name, val2, status=stat) + if (stat == 0) then + if (val == val2) then + env_is_set = .true. + return + end if + else if (stat /= 1) then + error stop 10 + endif + env_is_set = .false. + end +end