From 9f9c986c3c991683610ec6145bb7361894714706 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Wed, 14 Dec 2022 22:07:38 +0100 Subject: [PATCH] libgomp: Handle OpenMP's reverse offloads, unlocking on error paths, pt. 1 The 'libgomp.fortran/reverse-offload-5.f90' test case added in recent commit r13-4593-gea4b23d9c82d9be3b982c3519fe5e8e9d833a6a8 "libgomp: Handle OpenMP's reverse offloads" for nvptx offloading runs into an error condition (thus, XFAILed) -- but then blocks until timeout, insted of terminating promptly: libgomp: cuMemcpyDtoHAsync error: invalid argument WARNING: program timed out. XFAIL: libgomp.fortran/reverse-offload-5.f90 -O execution test Attempt to fix that by not calling 'CUDA_CALL_ASSERT'/'GOMP_PLUGIN_fatal' with device lock held. That unfortunately doesn't resolve the issue (get another hang elsewhere, later on), but does seem like an improvement anyway? libgomp/ * plugin/plugin-nvptx.c (rev_off_dev_to_host_cpy) (rev_off_host_to_dev_cpy): Make 'static', 'bool'. 'CUDA_CALL_ERET' instead of 'CUDA_CALL_ASSERT'. * libgomp-plugin.c (GOMP_PLUGIN_target_rev): Adjust. * libgomp-plugin.h (GOMP_PLUGIN_target_rev): Likewise. * libgomp.h (gomp_target_rev): Likewise. * target.c (gomp_target_rev): Likewise. --- libgomp/libgomp-plugin.c | 4 +- libgomp/libgomp-plugin.h | 4 +- libgomp/libgomp.h | 4 +- libgomp/plugin/plugin-nvptx.c | 18 ++++++--- libgomp/target.c | 74 ++++++++++++++++++++++++++--------- 5 files changed, 73 insertions(+), 31 deletions(-) diff --git a/libgomp/libgomp-plugin.c b/libgomp/libgomp-plugin.c index 316de749f696..6a93cc9e829b 100644 --- a/libgomp/libgomp-plugin.c +++ b/libgomp/libgomp-plugin.c @@ -82,9 +82,9 @@ GOMP_PLUGIN_fatal (const char *msg, ...) void GOMP_PLUGIN_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num, - void (*dev_to_host_cpy) (void *, const void *, size_t, + bool (*dev_to_host_cpy) (void *, const void *, size_t, void *), - void (*host_to_dev_cpy) (void *, const void *, size_t, + bool (*host_to_dev_cpy) (void *, const void *, size_t, void *), void *token) { gomp_target_rev (fn_ptr, mapnum, devaddrs_ptr, sizes_ptr, kinds_ptr, dev_num, diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h index ac3878289506..fb533164bf9b 100644 --- a/libgomp/libgomp-plugin.h +++ b/libgomp/libgomp-plugin.h @@ -122,9 +122,9 @@ extern void GOMP_PLUGIN_fatal (const char *, ...) extern void GOMP_PLUGIN_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, int, - void (*) (void *, const void *, size_t, + bool (*) (void *, const void *, size_t, void *), - void (*) (void *, const void *, size_t, + bool (*) (void *, const void *, size_t, void *), void *); /* Prototypes for functions implemented by libgomp plugins. */ diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h index 8fc9379d1b3c..b7d1397cd49e 100644 --- a/libgomp/libgomp.h +++ b/libgomp/libgomp.h @@ -1130,8 +1130,8 @@ extern int gomp_get_num_devices (void); extern bool gomp_target_task_fn (void *); extern void gomp_target_rev (uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, int, - void (*) (void *, const void *, size_t, void *), - void (*) (void *, const void *, size_t, void *), + bool (*) (void *, const void *, size_t, void *), + bool (*) (void *, const void *, size_t, void *), void *); /* Splay tree definitions. */ diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index 5f8aed56c8b1..c62a4bbdccff 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -1,3 +1,5 @@ +#pragma GCC optimize "O0" + /* Plugin for NVPTX execution. Copyright (C) 2013-2022 Free Software Foundation, Inc. @@ -2060,20 +2062,24 @@ nvptx_stacks_acquire (struct ptx_device *ptx_dev, size_t size, int num) } -void +static bool rev_off_dev_to_host_cpy (void *dest, const void *src, size_t size, CUstream stream) { - CUDA_CALL_ASSERT (cuMemcpyDtoHAsync, dest, (CUdeviceptr) src, size, stream); - CUDA_CALL_ASSERT (cuStreamSynchronize, stream); + CUDA_CALL_ERET (false, + cuMemcpyDtoHAsync, dest, (CUdeviceptr) src, size, stream); + CUDA_CALL_ERET (false, cuStreamSynchronize, stream); + return true; } -void +static bool rev_off_host_to_dev_cpy (void *dest, const void *src, size_t size, CUstream stream) { - CUDA_CALL_ASSERT (cuMemcpyHtoDAsync, (CUdeviceptr) dest, src, size, stream); - CUDA_CALL_ASSERT (cuStreamSynchronize, stream); + CUDA_CALL_ERET (false, + cuMemcpyHtoDAsync, (CUdeviceptr) dest, src, size, stream); + CUDA_CALL_ERET (false, cuStreamSynchronize, stream); + return true; } void diff --git a/libgomp/target.c b/libgomp/target.c index afe1720b353d..e707129500f9 100644 --- a/libgomp/target.c +++ b/libgomp/target.c @@ -1,3 +1,5 @@ +#pragma GCC optimize "O0" + /* Copyright (C) 2013-2022 Free Software Foundation, Inc. Contributed by Jakub Jelinek . @@ -3294,8 +3296,8 @@ gomp_map_cdata_lookup (struct cpy_data *d, uint64_t *devaddrs, void gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, uint64_t sizes_ptr, uint64_t kinds_ptr, int dev_num, - void (*dev_to_host_cpy) (void *, const void *, size_t, void*), - void (*host_to_dev_cpy) (void *, const void *, size_t, void*), + bool (*dev_to_host_cpy) (void *, const void *, size_t, void*), + bool (*host_to_dev_cpy) (void *, const void *, size_t, void*), void *token) { /* Return early if there is no offload code. */ @@ -3340,12 +3342,21 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, kinds = (unsigned short *) gomp_malloc (mapnum * sizeof (unsigned short)); if (dev_to_host_cpy) { - dev_to_host_cpy (devaddrs, (const void *) (uintptr_t) devaddrs_ptr, - mapnum * sizeof (uint64_t), token); - dev_to_host_cpy (sizes, (const void *) (uintptr_t) sizes_ptr, - mapnum * sizeof (uint64_t), token); - dev_to_host_cpy (kinds, (const void *) (uintptr_t) kinds_ptr, - mapnum * sizeof (unsigned short), token); + bool ok = true; + ok = ok && dev_to_host_cpy (devaddrs, + (const void *) (uintptr_t) devaddrs_ptr, + mapnum * sizeof (uint64_t), token); + ok = ok && dev_to_host_cpy (sizes, + (const void *) (uintptr_t) sizes_ptr, + mapnum * sizeof (uint64_t), token); + ok = ok && dev_to_host_cpy (kinds, + (const void *) (uintptr_t) kinds_ptr, + mapnum * sizeof (unsigned short), token); + if (!ok) + { + /*TODO gomp_mutex_unlock (&devicep->lock); */ + exit (EXIT_FAILURE); + } } else { @@ -3385,8 +3396,15 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, memcpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], (size_t) sizes[i]); else if (dev_to_host_cpy) - dev_to_host_cpy (tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], - (size_t) sizes[i], token); + { + if (!dev_to_host_cpy (tgt + tgt_size, + (void *) (uintptr_t) devaddrs[i], + (size_t) sizes[i], token)) + { + /*TODO gomp_mutex_unlock (&devicep->lock); */ + exit (EXIT_FAILURE); + } + } else gomp_copy_dev2host (devicep, NULL, tgt + tgt_size, (void *) (uintptr_t) devaddrs[i], @@ -3481,9 +3499,15 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, || kind == GOMP_MAP_ALWAYS_TOFROM) { if (dev_to_host_cpy) - dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i], - (void *) (uintptr_t) cdata[i].devaddr, - sizes[i], token); + { + if (!dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i], + (void *) (uintptr_t) cdata[i].devaddr, + sizes[i], token)) + { + gomp_mutex_unlock (&devicep->lock); + exit (EXIT_FAILURE); + } + } else gomp_copy_dev2host (devicep, NULL, (void *) (uintptr_t) devaddrs[i], @@ -3556,9 +3580,15 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, = (uint64_t) (uintptr_t) gomp_aligned_alloc (align, sizes[i]); if (dev_to_host_cpy) - dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i], - (void *) (uintptr_t) cdata[i].devaddr, - sizes[i], token); + { + if (!dev_to_host_cpy ((void *) (uintptr_t) devaddrs[i], + (void *) (uintptr_t) cdata[i].devaddr, + sizes[i], token)) + { + gomp_mutex_unlock (&devicep->lock); + exit (EXIT_FAILURE); + } + } else gomp_copy_dev2host (devicep, NULL, (void *) (uintptr_t) devaddrs[i], @@ -3662,9 +3692,15 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr, case GOMP_MAP_FROM: case GOMP_MAP_TOFROM: if (copy && host_to_dev_cpy) - host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr, - (void *) (uintptr_t) devaddrs[i], - sizes[i], token); + { + if (!host_to_dev_cpy ((void *) (uintptr_t) cdata[i].devaddr, + (void *) (uintptr_t) devaddrs[i], + sizes[i], token)) + { + /*TODO gomp_mutex_unlock (&devicep->lock); */ + exit (EXIT_FAILURE); + } + } else if (copy) gomp_copy_host2dev (devicep, NULL, (void *) (uintptr_t) cdata[i].devaddr, -- 2.35.1