Hi Jakub, Am 22.08.2022 um 17:35 schrieb Jakub Jelinek: >> +/* Default values of ICVs according to the OpenMP standard. */ >> +struct gomp_default_icv_t gomp_default_icv_values = { >> + .run_sched_var = GFS_DYNAMIC, >> + .run_sched_chunk_size = 1, >> + .max_active_levels_var = 1, >> + .bind_var = omp_proc_bind_false, >> + .nteams_var = 0, >> + .teams_thread_limit_var = 0, >> + .default_device_var = 0 >> +}; > > Why this var (and if it is really needed, why it isn't const)? > You seem to be using only 2 fields from it: > libgomp/libgomp.h:extern struct gomp_default_icv_t gomp_default_icv_values; > libgomp/env.c:struct gomp_default_icv_t gomp_default_icv_values = { > libgomp/target.c: new->icvs.nteams = gomp_default_icv_values.nteams_var; > libgomp/target.c: new->icvs.default_device = gomp_default_icv_values.default_device_var; gomp_default_icv_values is used to store the default values of the ICVs as defined in the OpenMP standard. Previously this was not necessary since there were only host-related ICVs being initialized with the corresponding default values in gomp_global_icv. gomp_global_icv cannot be used to get the default values in general as they are overwritten as soon as we parse an environment variable for a host-related ICV. In contrast we need the default values not only at the very beginning when we parse the environment variables, but also when we create device-specific ICV structs. The point in time when a device-specific ICV struct is created can be when a particular device is used first (as we don't know the device numbers during parsing the environment variables). I introduced gomp_default_icv_values in order to have the default ICV values centralized (like in gomp_global_icv before) for a better readability and maintanance. As you stated correctly below, there were still multiple places using the defaults directly. I modified that. > >> + >> bool gomp_cancel_var = false; >> enum gomp_target_offload_t gomp_target_offload_var >> = GOMP_TARGET_OFFLOAD_DEFAULT; >> @@ -104,86 +123,94 @@ int goacc_default_dims[GOMP_DIM_MAX]; >> static int wait_policy; >> static unsigned long stacksize = GOMP_DEFAULT_STACKSIZE; >> >> -/* Parse the OMP_SCHEDULE environment variable. */ >> - >> static void >> -parse_schedule (void) >> +print_env_var_error (const char *env, const char *val) >> { >> - char *env, *end; >> + char name[val - env]; >> + memcpy (name, env, val - env - 1); >> + name[val - env - 1] = '\0'; >> + gomp_error ("Invalid value for environment variable %s: %s", name, val); > > Why the temporary buffer (especially VLA)? > Just > gomp_error ("Invalid value for environment variable %.*s: %s", > (int) (val - env - 1), env, val); > should do the job. Good hint, thanks! (changed) > >> +/* Parse the OMP_SCHEDULE environment variable. */ >> +static bool >> +parse_schedule (const char *env, const char *val, void * const params[]) > > No space after * Changed. > >> +#define ENTRY(NAME) NAME, sizeof (NAME) - 1 >> +static const struct envvar >> +{ >> + const char *name; >> + int name_len; >> + uint8_t flag_vars[3]; >> + uint8_t flag; >> + bool (*parse_func) (const char *, const char *, void * const[]); >> +} envvars[] = { >> + { ENTRY ("OMP_SCHEDULE"), >> + { GOMP_ICV_SCHEDULE, GOMP_ICV_SCHEDULE_CHUNK_SIZE }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_schedule }, >> + { ENTRY ("OMP_NUM_TEAMS"), >> + { GOMP_ICV_NTEAMS }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_int }, >> + { ENTRY ("OMP_DYNAMIC"), >> + { GOMP_ICV_DYNAMIC }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_boolean }, >> + { ENTRY ("OMP_TEAMS_THREAD_LIMIT"), >> + { GOMP_ICV_TEAMS_THREAD_LIMIT }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_int }, >> + { ENTRY ("OMP_THREAD_LIMIT"), >> + { GOMP_ICV_THREAD_LIMIT }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_unsigned_long }, >> + { ENTRY ("OMP_NUM_THREADS"), >> + { GOMP_ICV_NTHREADS, GOMP_ICV_NTHREADS_LIST, GOMP_ICV_NTHREADS_LIST_LEN }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_unsigned_long_list }, >> + { ENTRY ("OMP_PROC_BIND"), >> + { GOMP_ICV_BIND, GOMP_ICV_BIND_LIST, GOMP_ICV_BIND_LIST_LEN }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_bind_var }, >> + { ENTRY ("OMP_MAX_ACTIVE_LEVELS"), >> + { GOMP_ICV_MAX_ACTIVE_LEVELS }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_unsigned_long }, >> + { ENTRY ("OMP_WAIT_POLICY"), >> + { GOMP_ICV_WAIT_POLICY }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_wait_policy }, >> + { ENTRY ("OMP_STACKSIZE"), >> + { GOMP_ICV_STACKSIZE }, >> + GOMP_ENV_SUFFIX_DEV | GOMP_ENV_SUFFIX_ALL | GOMP_ENV_SUFFIX_DEV_X, >> + &parse_stacksize }, >> + { ENTRY ("OMP_CANCELLATION"), { GOMP_ICV_CANCELLATION }, 0, &parse_boolean }, >> + { ENTRY ("OMP_DISPLAY_AFFINITY"), { GOMP_ICV_DISPLAY_AFFINITY }, 0, >> + &parse_boolean }, >> + { ENTRY ("OMP_TARGET_OFFLOAD"), { GOMP_ICV_TARGET_OFFLOAD }, 0, >> + &parse_target_offload }, >> + { ENTRY ("OMP_MAX_TASK_PRIORITY"), { GOMP_ICV_MAX_TASK_PRIORITY }, 0, >> + &parse_int }, >> + { ENTRY ("OMP_ALLOCATOR"), { GOMP_ICV_ALLOCATOR }, 0, &parse_allocator }, >> + { ENTRY ("OMP_DEFAULT_DEVICE"), { GOMP_ICV_DEFAULT_DEVICE }, 0, &parse_int } > > All the entries in the table start with "OMP_" prefix, isn't it wasteful > to include that prefix in name field and name_len? > I mean, places that use the name to print it can just use OMP_%s and the > matching doesn't need to compare it again. Cutting the environment variable names in pieces worsens perhaps the readability but as we have many "OMP_"s that we can reduce, I agree with you (changed). > >> +static bool >> +get_device_num (char *env, char *dev_num_ptr, int *dev_num, int *dev_num_len) >> +{ >> + char *end; >> + int pos = 0; >> + >> + if (dev_num_ptr[0] == '-') >> + { >> + gomp_error ("Non-negative device number expected in %s", env); >> + return false; >> + } >> + >> + while (pos <= INT_MAX_STR_LEN) >> + { >> + if (dev_num_ptr[pos] == '\0' || dev_num_ptr[pos] == '=') >> + break; >> + pos++; >> + } >> + if (pos > INT_MAX_STR_LEN) >> + { >> + gomp_error ("Invalid device number in %s (too long)", env); >> + return false; >> + } > > I don't understand the purpose of the above loop and if. The purpose of the loop is to limit the number of digits that are parsed for the device number. Theoretically an environment variable name can be very large and we don't know what a user specifies. Moreover, it's used to recognize that the device number is not just invalid but that it is too long. Assuming that strtoul does only parse a reasonable number of digits even if the device number is larger than ULONG_MAX and if we don't want to distinguish different invalid cases for the error output, I agree to use strtoul only. > >> + >> + *dev_num = (int) strtoul (dev_num_ptr, &end, 10); > > Just call strtoul and verify afterwards using end you get, no need to walk > it separately. > I wouldn't cast to int before verification that it fits though. > So perhaps > char *end; > unsigned long val = strtoul (dev_num_ptr, &end, 10); > > if (val > INT_MAX > || *end != '=' > || (dev_num_ptr[0] == '0' && val != 0) > || (dev_num_ptr[0] < '0' || dev_num_ptr[0] > '9')) > { > gomp_error ("Invalid device number in %s", env); > return false; > } > *dev_num = val; > *dev_num_len = end - dev_num_ptr; > return true; > or so? No need to differentiate different cases, and you want to > rule out "OMP_SCHEDULE_DEV_ \t+3=dynamic,3" too. > Changed and also added a test case for leading whitespaces in icv-8.c. >> + if (dev_num_ptr[0] == '0' && *dev_num != 0) >> + { >> + gomp_error ("Invalid device number in %s (leading zero)", env); >> + return false; >> + } >> + if (dev_num_ptr == end || *end != '=') >> + { >> + gomp_error ("Invalid device number in %s", env); >> + return false; >> + } >> + >> + *dev_num_len = pos; >> + return true; >> +} >> + > >> +static uint32_t * >> +add_initial_icv_to_list (int dev_num, int icv_code, void *icv_addr[3]) >> +{ >> + struct gomp_icv_list *last = NULL, *l = gomp_initial_icv_list; >> + while (l != NULL && l->device_num != dev_num) >> + { >> + last = l; >> + l = l->next; >> + } >> + >> + if (l == NULL) >> + { >> + l >> + = (struct gomp_icv_list *) gomp_malloc (sizeof (struct gomp_icv_list)); > > What is the advantage of adding there a newline after the var name? > The = is indented 2 columns to the right from var name, so having it on one > line has the same length. I guess that is just left over after a variable renaming from a longer name to "l" which previously exceeded the line lenght limitation. Corrected. > >> + l->device_num = dev_num; >> + memset (&(l->icvs), 0, sizeof (struct gomp_initial_icvs)); > > You could use gomp_malloc_cleared (then format it as > l = ((struct gomp_icv_list *) > gomp_malloc_cleared (sizeof (struct gomp_icv_list))); Thanks for the hint. Changed. > >> + l->flags = 0; >> + if (dev_num < 0) >> + { >> + l->next = gomp_initial_icv_list; >> + gomp_initial_icv_list = l; >> + } >> + else >> + { >> + l->next = NULL; >> + if (last == NULL) >> + gomp_initial_icv_list = l; >> + else >> + last->next = l; >> + } >> + } >> + >> + get_icv_member_addr (&(l->icvs), icv_code, icv_addr); > > No need for the ()s around l->icvs, &l->icvs will do (multiple places, > for other fields and other pointers too). Corrected at several places. >> + >> + return &(l->flags); > > Similarly. Corrected. >> + >> + /* Initial values for host environment variables should always exist even if >> + there is no explicitly set host environment variable. Moreover, they are >> + set to the initial global values. */ >> + add_initial_icv_to_list (-3, 0, NULL); >> + none = gomp_get_initial_icv_item (-3); > > Can you please add an enum for these -3/-2/-1 values and use it? Enum added and -3/-2/-1 replaced accordingly. > >> + none->icvs.nthreads_var = 1; >> + none->icvs.thread_limit_var = UINT_MAX; >> + none->icvs.run_sched_var = GFS_DYNAMIC; >> + none->icvs.run_sched_chunk_size = 1; >> + none->icvs.default_device_var = 0; >> + none->icvs.dyn_var = false; >> + none->icvs.max_active_levels_var = 1; >> + none->icvs.bind_var = omp_proc_bind_false; > > So, is this essentially a 3rd copy of the defaults? > gomp_global_icv has it, gomp_default_icv_values (partially) and > this spot too. Don't we want to have it initialized once and copy over? This concerns two aspects: (a) Having two structs for the host-related ICV values, one for holding the initial values for omp_display_env and another one for the changeable values. Both need to be initialized. (b) Using the concrete default values at multiple locations. This is changed now (as explained above), such that we now have only one place for the definition of ICV defaults. >> + >> + for (env = environ; *env != 0; env++) >> + { >> + if (!startswith (*env, "OMP_")) >> + continue; >> + >> + for (omp_var = 0; omp_var < OMP_VAR_CNT; omp_var++) >> + { >> + if (startswith (*env, envvars[omp_var].name)) > > As I said elsewhere, > if (startswith (*env + sizeof ("OMP_") - 1, envvars[omp_var].name)) > instead? > Or have a temporary var set to *env + sizeof ("OMP_") - 1; and use > it instead of *env. > The amount of &(*env)[ etc. spots that could be just &name[ is big. The suggested variable is introduced now. > >> + { >> + pos = envvars[omp_var].name_len; >> + if ((*env)[pos] == '=') >> + { >> + pos++; >> + flag_var_addr >> + = add_initial_icv_to_list (-3, >> + envvars[omp_var].flag_vars[0], >> + params); >> + } >> + else if (startswith (&(*env)[pos], "_DEV=") >> + && envvars[omp_var].flag & GOMP_ENV_SUFFIX_DEV) >> + { >> + pos += 5; >> + flag_var_addr >> + = add_initial_icv_to_list (-1, >> + envvars[omp_var].flag_vars[0], >> + params); >> + } >> + else if (startswith (&(*env)[pos], "_ALL=") >> + && envvars[omp_var].flag & GOMP_ENV_SUFFIX_ALL) >> + { >> + pos += 5; >> + flag_var_addr >> + = add_initial_icv_to_list (-2, >> + envvars[omp_var].flag_vars[0], >> + params); >> + } >> + else if (startswith (&(*env)[pos], "_DEV_") >> + && envvars[omp_var].flag & GOMP_ENV_SUFFIX_DEV_X) >> + { >> + pos += 5; >> + if (!get_device_num (*env, &(*env)[pos], &dev_num, >> + &dev_num_len)) >> + goto next_var; > > Isn't goto next_var; equivalent to just break; ? Yes, changed. > >> + >> + pos += dev_num_len + 1; >> + flag_var_addr >> + = add_initial_icv_to_list (dev_num, >> + envvars[omp_var].flag_vars[0], >> + params); >> + } >> + else >> + { >> + gomp_error ("Invalid device number in %s", *env); >> + break; >> + } >> + env_val = &(*env)[pos]; >> + >> + if (envvars[omp_var].parse_func (*env, env_val, params)) >> + { >> + for (i = 0; i < 3; ++i) >> + if (envvars[omp_var].flag_vars[i]) >> + gomp_set_icv_flag (flag_var_addr, >> + envvars[omp_var].flag_vars[i]); >> + else >> + break; >> + } >> + >> + break; >> + } >> + } >> + >> + next_var: > > So by using break; above we could drop this. Removed. > >> +struct gomp_default_icv_t >> +{ >> + enum gomp_schedule_type run_sched_var; >> + int run_sched_chunk_size; >> + unsigned char max_active_levels_var; >> + char bind_var; >> + int nteams_var; >> + int teams_thread_limit_var; >> + int default_device_var; >> +}; >> +extern struct gomp_default_icv_t gomp_default_icv_values; > > Please don't mix the type definitions with extern declarations and > with function prototypes. > Function prototypes should go to the section of libgomp.h that starts > with /* Function prototypes. */ comment and should go under section > corresponding to the filename that provides the definition. > For externs, there is a large block of extern declarations. Thanks for the hint. I have re-sorted the according parts and hope that it's ok now. > >> + struct gomp_icv_list >> + { >> + int device_num; >> + struct gomp_initial_icvs icvs; >> + uint32_t flags; > > Put flags after device_num to avoid some useless padding? Good spot. Changed. > >> + struct gomp_icv_list *next; >> + }; > >> +extern struct gomp_icv_list* gomp_add_device_specific_icv > > * should go after space, not before. Corrected. > >> +int >> +main (int argc, char *const *argv) > > No need for "int argc, char *const *argv" when you aren't using them, > just > int > main () This is just left over from the execv approach that I removed. Corrected. The patch was tested on x86_64-linux with nvptx and amdgcn offloading without regressions. Marcel ----------------- Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955