Hi, The patch below contains all changes to libgomp files except for the hsa plugin (which is in the following patch). The patch is a re-post of https://gcc.gnu.org/ml/gcc-patches/2015-12/msg01288.html but we have incorporated a number of requests from the feedback. From the subsequent communications with Jakub, I have the feeling he is fine with the changes. But perhaps he or someone else would like to have one more look. Thanks, Martin 2016-01-13 Martin Jambor include/ * gomp-constants.h (GOMP_DEVICE_HSA): New macro. (GOMP_VERSION_HSA): Likewise. (GOMP_TARGET_ARG_DEVICE_MASK): Likewise. (GOMP_TARGET_ARG_DEVICE_ALL): Likewise. (GOMP_TARGET_ARG_SUBSEQUENT_PARAM): Likewise. (GOMP_TARGET_ARG_ID_MASK): Likewise. (GOMP_TARGET_ARG_NUM_TEAMS): Likewise. (GOMP_TARGET_ARG_THREAD_LIMIT): Likewise. (GOMP_TARGET_ARG_VALUE_SHIFT): Likewise. (GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES): Likewise. libgomp/ * libgomp-plugin.h (offload_target_type): New element OFFLOAD_TARGET_TYPE_HSA. * libgomp.h (gomp_target_task): New fields firstprivate_copies and args. (bool gomp_create_target_task): Updated. (gomp_device_descr): Extra parameter of run_func and async_run_func, new field can_run_func. * libgomp_g.h (GOMP_target_ext): Update prototype. * oacc-host.c (host_run): Added a new parameter args. * target.c (calculate_firstprivate_requirements): New function. (copy_firstprivate_data): Likewise. (gomp_target_fallback_firstprivate): Use them. (gomp_target_unshare_firstprivate): New function. (gomp_get_target_fn_addr): Allow returning NULL for shared memory devices. (GOMP_target): Do host fallback for all shared memory devices. Do not pass any args to plugins. (GOMP_target_ext): Introduce device-specific argument parameter args. Allow host fallback if device shares memory. Do not remap data if device has shared memory. (gomp_target_task_fn): Likewise. Also treat shared memory devices like host fallback for mappings. (GOMP_target_data): Treat shared memory devices like host fallback. (GOMP_target_data_ext): Likewise. (GOMP_target_update): Likewise. (GOMP_target_update_ext): Likewise. Also pass NULL as args to gomp_create_target_task. (GOMP_target_enter_exit_data): Likewise. (omp_target_alloc): Treat shared memory devices like host fallback. (omp_target_free): Likewise. (omp_target_is_present): Likewise. (omp_target_memcpy): Likewise. (omp_target_memcpy_rect): Likewise. (omp_target_associate_ptr): Likewise. (gomp_load_plugin_for_device): Also load can_run. * task.c (GOMP_PLUGIN_target_task_completion): Free firstprivate_copies. (gomp_create_target_task): Accept new argument args and store it to ttask. liboffloadmic/plugin * libgomp-plugin-intelmic.cpp (GOMP_OFFLOAD_async_run): New unused parameter. (GOMP_OFFLOAD_run): Likewise. diff --git a/include/gomp-constants.h b/include/gomp-constants.h index dffd631..a8e7723 100644 --- a/include/gomp-constants.h +++ b/include/gomp-constants.h @@ -176,6 +176,7 @@ enum gomp_map_kind #define GOMP_DEVICE_NOT_HOST 4 #define GOMP_DEVICE_NVIDIA_PTX 5 #define GOMP_DEVICE_INTEL_MIC 6 +#define GOMP_DEVICE_HSA 7 #define GOMP_DEVICE_ICV -1 #define GOMP_DEVICE_HOST_FALLBACK -2 @@ -201,6 +202,7 @@ enum gomp_map_kind #define GOMP_VERSION 0 #define GOMP_VERSION_NVIDIA_PTX 1 #define GOMP_VERSION_INTEL_MIC 0 +#define GOMP_VERSION_HSA 0 #define GOMP_VERSION_PACK(LIB, DEV) (((LIB) << 16) | (DEV)) #define GOMP_VERSION_LIB(PACK) (((PACK) >> 16) & 0xffff) @@ -228,4 +230,30 @@ enum gomp_map_kind #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0xffff) #define GOMP_LAUNCH_OP_MAX 0xffff +/* Bitmask to apply in order to find out the intended device of a target + argument. */ +#define GOMP_TARGET_ARG_DEVICE_MASK ((1 << 7) - 1) +/* The target argument is significant for all devices. */ +#define GOMP_TARGET_ARG_DEVICE_ALL 0 + +/* Flag set when the subsequent element in the device-specific argument + values. */ +#define GOMP_TARGET_ARG_SUBSEQUENT_PARAM (1 << 7) + +/* Bitmask to apply to a target argument to find out the value identifier. */ +#define GOMP_TARGET_ARG_ID_MASK (((1 << 8) - 1) << 8) +/* Target argument index of NUM_TEAMS. */ +#define GOMP_TARGET_ARG_NUM_TEAMS (1 << 8) +/* Target argument index of THREAD_LIMIT. */ +#define GOMP_TARGET_ARG_THREAD_LIMIT (2 << 8) + +/* If the value is directly embeded in target argument, it should be a 16-bit + at most and shifted by this many bits. */ +#define GOMP_TARGET_ARG_VALUE_SHIFT 16 + +/* HSA specific data structures. */ + +/* Identifiers of device-specific target arguments. */ +#define GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES (1 << 8) + #endif diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index 64035e4..53f9248 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -48,7 +48,8 @@ enum offload_target_type OFFLOAD_TARGET_TYPE_HOST = 2, /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed. */ OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5, - OFFLOAD_TARGET_TYPE_INTEL_MIC = 6 + OFFLOAD_TARGET_TYPE_INTEL_MIC = 6, + OFFLOAD_TARGET_TYPE_HSA = 7 }; /* Auxiliary struct, used for transferring pairs of addresses from plugin diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 6ddde56..7108a6d 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -496,6 +496,10 @@ struct gomp_target_task struct target_mem_desc *tgt; struct gomp_task *task; struct gomp_team *team; + /* Copies of firstprivate mapped data for shared memory accelerators. */ + void *firstprivate_copies; + /* Device-specific target arguments. */ + void **args; void *hostaddrs[]; }; @@ -750,7 +754,8 @@ extern void gomp_task_maybe_wait_for_dependencies (void **); extern bool gomp_create_target_task (struct gomp_device_descr *, void (*) (void *), size_t, void **, size_t *, unsigned short *, unsigned int, - void **, enum gomp_target_task_state); + void **, void **, + enum gomp_target_task_state); static void inline gomp_finish_task (struct gomp_task *task) @@ -937,8 +942,9 @@ struct gomp_device_descr void *(*dev2host_func) (int, void *, const void *, size_t); void *(*host2dev_func) (int, void *, const void *, size_t); void *(*dev2dev_func) (int, void *, const void *, size_t); - void (*run_func) (int, void *, void *); - void (*async_run_func) (int, void *, void *, void *); + bool (*can_run_func) (void *); + void (*run_func) (int, void *, void *, void **); + void (*async_run_func) (int, void *, void *, void **, void *); /* Splay tree containing information about mapped memory regions. */ struct splay_tree_s mem_map; diff --git a/libgomp/libgomp_g.h b/libgomp/libgomp_g.h index 6229ca0..24eebb6 100644 --- a/libgomp/libgomp_g.h +++ b/libgomp/libgomp_g.h @@ -278,8 +278,7 @@ extern void GOMP_single_copy_end (void *); extern void GOMP_target (int, void (*) (void *), const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_target_ext (int, void (*) (void *), size_t, void **, size_t *, - unsigned short *, unsigned int, void **, - int, int); + unsigned short *, unsigned int, void **, void **); extern void GOMP_target_data (int, const void *, size_t, void **, size_t *, unsigned char *); extern void GOMP_target_data_ext (int, size_t, void **, size_t *, diff --git a/libgomp/oacc-host.c b/libgomp/oacc-host.c index 0760e44..1e760f6 100644 --- a/libgomp/oacc-host.c +++ b/libgomp/oacc-host.c @@ -123,7 +123,8 @@ host_host2dev (int n __attribute__ ((unused)), } static void -host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars) +host_run (int n __attribute__ ((unused)), void *fn_ptr, void *vars, + void **args __attribute__((unused))) { void (*fn)(void *) = (void (*)(void *)) fn_ptr; diff --git a/libgomp/target.c b/libgomp/target.c index bea5822..f1f5849 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1329,44 +1329,90 @@ gomp_target_fallback (void (*fn) (void *), void **hostaddrs) *thr = old_thr; } -/* Host fallback with firstprivate map-type handling. */ +/* Calculate alignment and size requirements of a private copy of data shared + as GOMP_MAP_FIRSTPRIVATE and store them to TGT_ALIGN and TGT_SIZE. */ -static void -gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, - void **hostaddrs, size_t *sizes, - unsigned short *kinds) +static inline void +calculate_firstprivate_requirements (size_t mapnum, size_t *sizes, + unsigned short *kinds, size_t *tgt_align, + size_t *tgt_size) { - size_t i, tgt_align = 0, tgt_size = 0; - char *tgt = NULL; + size_t i; + for (i = 0; i < mapnum; i++) + if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) + { + size_t align = (size_t) 1 << (kinds[i] >> 8); + if (*tgt_align < align) + *tgt_align = align; + *tgt_size = (*tgt_size + align - 1) & ~(align - 1); + *tgt_size += sizes[i]; + } +} + +/* Copy data shared as GOMP_MAP_FIRSTPRIVATE to DST. */ + +static inline void +copy_firstprivate_data (char *tgt, size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds, size_t tgt_align, + size_t tgt_size) +{ + uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); + if (al) + tgt += tgt_align - al; + tgt_size = 0; + size_t i; for (i = 0; i < mapnum; i++) if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) { size_t align = (size_t) 1 << (kinds[i] >> 8); - if (tgt_align < align) - tgt_align = align; tgt_size = (tgt_size + align - 1) & ~(align - 1); - tgt_size += sizes[i]; + memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); + hostaddrs[i] = tgt + tgt_size; + tgt_size = tgt_size + sizes[i]; } +} + +/* Host fallback with firstprivate map-type handling. */ + +static void +gomp_target_fallback_firstprivate (void (*fn) (void *), size_t mapnum, + void **hostaddrs, size_t *sizes, + unsigned short *kinds) +{ + size_t tgt_align = 0, tgt_size = 0; + calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, + &tgt_size); if (tgt_align) { - tgt = gomp_alloca (tgt_size + tgt_align - 1); - uintptr_t al = (uintptr_t) tgt & (tgt_align - 1); - if (al) - tgt += tgt_align - al; - tgt_size = 0; - for (i = 0; i < mapnum; i++) - if ((kinds[i] & 0xff) == GOMP_MAP_FIRSTPRIVATE) - { - size_t align = (size_t) 1 << (kinds[i] >> 8); - tgt_size = (tgt_size + align - 1) & ~(align - 1); - memcpy (tgt + tgt_size, hostaddrs[i], sizes[i]); - hostaddrs[i] = tgt + tgt_size; - tgt_size = tgt_size + sizes[i]; - } + char *tgt = gomp_alloca (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, + tgt_size); } gomp_target_fallback (fn, hostaddrs); } +/* Handle firstprivate map-type for shared memory devices and the host + fallback. Return the pointer of firstprivate copies which has to be freed + after use. */ + +static void * +gomp_target_unshare_firstprivate (size_t mapnum, void **hostaddrs, + size_t *sizes, unsigned short *kinds) +{ + size_t tgt_align = 0, tgt_size = 0; + char *tgt = NULL; + + calculate_firstprivate_requirements (mapnum, sizes, kinds, &tgt_align, + &tgt_size); + if (tgt_align) + { + tgt = gomp_malloc (tgt_size + tgt_align - 1); + copy_firstprivate_data (tgt, mapnum, hostaddrs, sizes, kinds, tgt_align, + tgt_size); + } + return tgt; +} + /* Helper function of GOMP_target{,_ext} routines. */ static void * @@ -1390,7 +1436,12 @@ gomp_get_target_fn_addr (struct gomp_device_descr *devicep, splay_tree_key tgt_fn = splay_tree_lookup (&devicep->mem_map, &k); gomp_mutex_unlock (&devicep->lock); if (tgt_fn == NULL) - gomp_fatal ("Target function wasn't mapped"); + { + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + return NULL; + else + gomp_fatal ("Target function wasn't mapped"); + } return (void *) tgt_fn->tgt_offset; } @@ -1416,13 +1467,16 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, void *fn_addr; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + /* All shared memory devices should use the GOMP_target_ext function. */ + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) return gomp_target_fallback (fn, hostaddrs); struct target_mem_desc *tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, false, GOMP_MAP_VARS_TARGET); - devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); + devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start, + NULL); gomp_unmap_vars (tgt_vars, true); } @@ -1430,6 +1484,15 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, and several arguments have been added: FLAGS is a bitmask, see GOMP_TARGET_FLAG_* in gomp-constants.h. DEPEND is array of dependencies, see GOMP_task for details. + + ARGS is a pointer to an array consisting of a variable number of both + device-independent and device-specific arguments, which can take one two + elements where the first specifies for which device it is intended, the type + and optionally also the value. If the value is not present in the first + one, the whole second element the actual value. The last element of the + array is a single NULL. Among the device independent can be for example + NUM_TEAMS and THREAD_LIMIT. + NUM_TEAMS is positive if GOMP_teams will be called in the body with that value, or 1 if teams construct is not present, or 0, if teams construct does not have num_teams clause and so the choice is @@ -1443,14 +1506,10 @@ GOMP_target (int device, void (*fn) (void *), const void *unused, void GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - unsigned int flags, void **depend, int num_teams, - int thread_limit) + unsigned int flags, void **depend, void **args) { struct gomp_device_descr *devicep = resolve_device (device); - (void) num_teams; - (void) thread_limit; - if (flags & GOMP_TARGET_FLAG_NOWAIT) { struct gomp_thread *thr = gomp_thread (); @@ -1487,7 +1546,7 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, && !thr->task->final_task) { gomp_create_target_task (devicep, fn, mapnum, hostaddrs, - sizes, kinds, flags, depend, + sizes, kinds, flags, depend, args, GOMP_TARGET_TASK_BEFORE_MAP); return; } @@ -1507,17 +1566,30 @@ GOMP_target_ext (int device, void (*fn) (void *), size_t mapnum, void *fn_addr; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - || !(fn_addr = gomp_get_target_fn_addr (devicep, fn))) + || !(fn_addr = gomp_get_target_fn_addr (devicep, fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { gomp_target_fallback_firstprivate (fn, mapnum, hostaddrs, sizes, kinds); return; } - struct target_mem_desc *tgt_vars - = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, true, - GOMP_MAP_VARS_TARGET); - devicep->run_func (devicep->target_id, fn_addr, (void *) tgt_vars->tgt_start); - gomp_unmap_vars (tgt_vars, true); + struct target_mem_desc *tgt_vars; + void *fpc = NULL; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + fpc = gomp_target_unshare_firstprivate (mapnum, hostaddrs, sizes, kinds); + tgt_vars = NULL; + } + else + tgt_vars = gomp_map_vars (devicep, mapnum, hostaddrs, NULL, sizes, kinds, + true, GOMP_MAP_VARS_TARGET); + devicep->run_func (devicep->target_id, fn_addr, + tgt_vars ? (void *) tgt_vars->tgt_start : hostaddrs, + args); + if (tgt_vars) + gomp_unmap_vars (tgt_vars, true); + else + free (fpc); } /* Host fallback for GOMP_target_data{,_ext} routines. */ @@ -1547,7 +1619,8 @@ GOMP_target_data (int device, const void *unused, size_t mapnum, struct gomp_device_descr *devicep = resolve_device (device); if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)) return gomp_target_data_fallback (); struct target_mem_desc *tgt @@ -1565,7 +1638,8 @@ GOMP_target_data_ext (int device, size_t mapnum, void **hostaddrs, struct gomp_device_descr *devicep = resolve_device (device); if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return gomp_target_data_fallback (); struct target_mem_desc *tgt @@ -1595,7 +1669,8 @@ GOMP_target_update (int device, const void *unused, size_t mapnum, struct gomp_device_descr *devicep = resolve_device (device); if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; gomp_update (devicep, mapnum, hostaddrs, sizes, kinds, false); @@ -1626,7 +1701,7 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, mapnum, hostaddrs, sizes, kinds, flags | GOMP_TARGET_FLAG_UPDATE, - depend, GOMP_TARGET_TASK_DATA)) + depend, NULL, GOMP_TARGET_TASK_DATA)) return; } else @@ -1646,7 +1721,8 @@ GOMP_target_update_ext (int device, size_t mapnum, void **hostaddrs, } if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; struct gomp_thread *thr = gomp_thread (); @@ -1756,7 +1832,7 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, { if (gomp_create_target_task (devicep, (void (*) (void *)) NULL, mapnum, hostaddrs, sizes, kinds, - flags, depend, + flags, depend, NULL, GOMP_TARGET_TASK_DATA)) return; } @@ -1777,7 +1853,8 @@ GOMP_target_enter_exit_data (int device, size_t mapnum, void **hostaddrs, } if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return; struct gomp_thread *thr = gomp_thread (); @@ -1815,7 +1892,8 @@ gomp_target_task_fn (void *data) void *fn_addr; if (devicep == NULL || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) - || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn))) + || !(fn_addr = gomp_get_target_fn_addr (devicep, ttask->fn)) + || (devicep->can_run_func && !devicep->can_run_func (fn_addr))) { ttask->state = GOMP_TARGET_TASK_FALLBACK; gomp_target_fallback_firstprivate (ttask->fn, ttask->mapnum, @@ -1826,22 +1904,36 @@ gomp_target_task_fn (void *data) if (ttask->state == GOMP_TARGET_TASK_FINISHED) { - gomp_unmap_vars (ttask->tgt, true); + if (ttask->tgt) + gomp_unmap_vars (ttask->tgt, true); return false; } - ttask->tgt - = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, NULL, - ttask->sizes, ttask->kinds, true, - GOMP_MAP_VARS_TARGET); + void *actual_arguments; + if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) + { + ttask->tgt = NULL; + ttask->firstprivate_copies + = gomp_target_unshare_firstprivate (ttask->mapnum, ttask->hostaddrs, + ttask->sizes, ttask->kinds); + actual_arguments = ttask->hostaddrs; + } + else + { + ttask->tgt = gomp_map_vars (devicep, ttask->mapnum, ttask->hostaddrs, + NULL, ttask->sizes, ttask->kinds, true, + GOMP_MAP_VARS_TARGET); + actual_arguments = (void *) ttask->tgt->tgt_start; + } ttask->state = GOMP_TARGET_TASK_READY_TO_RUN; - devicep->async_run_func (devicep->target_id, fn_addr, - (void *) ttask->tgt->tgt_start, (void *) ttask); + devicep->async_run_func (devicep->target_id, fn_addr, actual_arguments, + ttask->args, (void *) ttask); return true; } else if (devicep == NULL - || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + || !(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return false; size_t i; @@ -1891,7 +1983,8 @@ omp_target_alloc (size_t size, int device_num) if (devicep == NULL) return NULL; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return malloc (size); gomp_mutex_lock (&devicep->lock); @@ -1919,7 +2012,8 @@ omp_target_free (void *device_ptr, int device_num) if (devicep == NULL) return; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { free (device_ptr); return; @@ -1946,7 +2040,8 @@ omp_target_is_present (void *ptr, int device_num) if (devicep == NULL) return 0; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return 1; gomp_mutex_lock (&devicep->lock); @@ -1976,7 +2071,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, if (dst_devicep == NULL) return EINVAL; - if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) dst_devicep = NULL; } if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) @@ -1988,7 +2084,8 @@ omp_target_memcpy (void *dst, void *src, size_t length, size_t dst_offset, if (src_devicep == NULL) return EINVAL; - if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) src_devicep = NULL; } if (src_devicep == NULL && dst_devicep == NULL) @@ -2118,7 +2215,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size, if (dst_devicep == NULL) return EINVAL; - if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(dst_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || dst_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) dst_devicep = NULL; } if (src_device_num != GOMP_DEVICE_HOST_FALLBACK) @@ -2130,7 +2228,8 @@ omp_target_memcpy_rect (void *dst, void *src, size_t element_size, if (src_devicep == NULL) return EINVAL; - if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(src_devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || src_devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) src_devicep = NULL; } @@ -2166,7 +2265,8 @@ omp_target_associate_ptr (void *host_ptr, void *device_ptr, size_t size, if (devicep == NULL) return EINVAL; - if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400)) + if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_OPENMP_400) + || devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) return EINVAL; gomp_mutex_lock (&devicep->lock); @@ -2309,6 +2409,7 @@ gomp_load_plugin_for_device (struct gomp_device_descr *device, { DLSYM (run); DLSYM (async_run); + DLSYM_OPT (can_run, can_run); DLSYM (dev2dev); } if (device->capabilities & GOMP_OFFLOAD_CAP_OPENACC_200) diff --git a/libgomp/task.c b/libgomp/task.c index de9936e..90bdb1c 100644 --- a/libgomp/task.c +++ b/libgomp/task.c @@ -581,6 +581,7 @@ GOMP_PLUGIN_target_task_completion (void *data) gomp_mutex_unlock (&team->task_lock); } ttask->state = GOMP_TARGET_TASK_FINISHED; + free (ttask->firstprivate_copies); gomp_target_task_completion (team, task); gomp_mutex_unlock (&team->task_lock); } @@ -593,7 +594,7 @@ bool gomp_create_target_task (struct gomp_device_descr *devicep, void (*fn) (void *), size_t mapnum, void **hostaddrs, size_t *sizes, unsigned short *kinds, - unsigned int flags, void **depend, + unsigned int flags, void **depend, void **args, enum gomp_target_task_state state) { struct gomp_thread *thr = gomp_thread (); @@ -653,6 +654,7 @@ gomp_create_target_task (struct gomp_device_descr *devicep, ttask->devicep = devicep; ttask->fn = fn; ttask->mapnum = mapnum; + ttask->args = args; memcpy (ttask->hostaddrs, hostaddrs, mapnum * sizeof (void *)); ttask->sizes = (size_t *) &ttask->hostaddrs[mapnum]; memcpy (ttask->sizes, sizes, mapnum * sizeof (size_t)); diff --git a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp index 68f7b2c..58ef595 100644 --- a/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp +++ b/liboffloadmic/plugin/libgomp-plugin-intelmic.cpp @@ -528,7 +528,7 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst_ptr, const void *src_ptr, extern "C" void GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, - void *async_data) + void **, void *async_data) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p, async_data = %p)", device, tgt_fn, tgt_vars, async_data); @@ -544,7 +544,7 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, } extern "C" void -GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars) +GOMP_OFFLOAD_run (int device, void *tgt_fn, void *tgt_vars, void **) { TRACE ("(device = %d, tgt_fn = %p, tgt_vars = %p)", device, tgt_fn, tgt_vars);