diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index ea327bf2ca0..6a9ff5cd93e 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -3226,7 +3226,11 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask) if (!init_hsa_context ()) return 0; /* Return -1 if no omp_requires_mask cannot be fulfilled but - devices were present. */ + devices were present. + Note: not all devices support USM, but the compiler refuses to create + binaries for those that don't anyway. */ + omp_requires_mask &= ~(GOMP_REQUIRES_UNIFIED_ADDRESS + | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY); if (hsa_context.agent_count > 0 && omp_requires_mask != 0) return -1; return hsa_context.agent_count; @@ -3810,6 +3814,89 @@ GOMP_OFFLOAD_async_run (int device, void *tgt_fn, void *tgt_vars, GOMP_PLUGIN_target_task_completion, async_data); } +/* Use a splay tree to track USM allocations. */ + +typedef struct usm_splay_tree_node_s *usm_splay_tree_node; +typedef struct usm_splay_tree_s *usm_splay_tree; +typedef struct usm_splay_tree_key_s *usm_splay_tree_key; + +struct usm_splay_tree_key_s { + void *addr; + size_t size; +}; + +static inline int +usm_splay_compare (usm_splay_tree_key x, usm_splay_tree_key y) +{ + if ((x->addr <= y->addr && x->addr + x->size > y->addr) + || (y->addr <= x->addr && y->addr + y->size > x->addr)) + return 0; + + return (x->addr > y->addr ? 1 : -1); +} + +#define splay_tree_prefix usm +#include "../splay-tree.h" + +static struct usm_splay_tree_s usm_map = { NULL }; + +/* Allocate memory suitable for Unified Shared Memory. + + In fact, AMD memory need only be "coarse grained", which target + allocations already are. We do need to track allocations so that + GOMP_OFFLOAD_is_usm_ptr can look them up. */ + +void * +GOMP_OFFLOAD_usm_alloc (int device, size_t size) +{ + void *ptr = GOMP_OFFLOAD_alloc (device, size); + + usm_splay_tree_node node = malloc (sizeof (struct usm_splay_tree_node_s)); + node->key.addr = ptr; + node->key.size = size; + node->left = NULL; + node->right = NULL; + usm_splay_tree_insert (&usm_map, node); + + return ptr; +} + +/* Free memory allocated via GOMP_OFFLOAD_usm_alloc. */ + +bool +GOMP_OFFLOAD_usm_free (int device, void *ptr) +{ + struct usm_splay_tree_key_s key = { ptr, 1 }; + usm_splay_tree_key node = usm_splay_tree_lookup (&usm_map, &key); + if (node) + { + usm_splay_tree_remove (&usm_map, &key); + free (node); + } + + return GOMP_OFFLOAD_free (device, ptr); +} + +/* True if the memory was allocated via GOMP_OFFLOAD_usm_alloc. */ + +bool +GOMP_OFFLOAD_is_usm_ptr (void *ptr) +{ + struct usm_splay_tree_key_s key = { ptr, 1 }; + return usm_splay_tree_lookup (&usm_map, &key); +} + +/* Indicate which GOMP_REQUIRES_* features are supported. */ + +bool +GOMP_OFFLOAD_supported_features (unsigned int *mask) +{ + *mask &= ~(GOMP_REQUIRES_UNIFIED_ADDRESS + | GOMP_REQUIRES_UNIFIED_SHARED_MEMORY); + + return (*mask == 0); +} + /* }}} */ /* {{{ OpenACC Plugin API */ @@ -4084,3 +4171,18 @@ GOMP_OFFLOAD_openacc_destroy_thread_data (void *data) } /* }}} */ +/* {{{ USM splay tree */ + +/* Include this now so that splay-tree.c doesn't include it later. This + avoids a conflict with splay_tree_prefix. */ +#include "libgomp.h" + +/* This allows splay-tree.c to call gomp_fatal in this context. The splay + tree code doesn't use the variadic arguments right now. */ +#define gomp_fatal(MSG, ...) GOMP_PLUGIN_fatal (MSG) + +/* Include the splay tree code inline, with the prefixes added. */ +#define splay_tree_prefix usm +#define splay_tree_c +#include "../splay-tree.h" +/* }}} */ diff --git a/libgomp/testsuite/lib/libgomp.exp b/libgomp/testsuite/lib/libgomp.exp index 891f90929d2..dce1af279e1 100644 --- a/libgomp/testsuite/lib/libgomp.exp +++ b/libgomp/testsuite/lib/libgomp.exp @@ -536,3 +536,25 @@ int main() { return 0; } } "-lcuda -lcudart" ] } + +# return 1 if OpenMP Unified Share Memory is supported + +proc check_effective_target_omp_usm { } { + if { [libgomp_check_effective_target_offload_target "nvptx"] } { + return 1 + } + + if { [libgomp_check_effective_target_offload_target "amdgcn"] } { + return [check_no_compiler_messages omp_usm executable { + #pragma omp requires unified_shared_memory + int main () { + #pragma omp target + ; + return 0; + } + }] + } + + return 0 +} + diff --git a/libgomp/testsuite/libgomp.c++/usm-1.C b/libgomp/testsuite/libgomp.c++/usm-1.C index fea25e5f10b..6e88f90d61f 100644 --- a/libgomp/testsuite/libgomp.c++/usm-1.C +++ b/libgomp/testsuite/libgomp.c++/usm-1.C @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */ +/* { dg-require-effective-target omp_usm } */ #include #pragma omp requires unified_shared_memory diff --git a/libgomp/testsuite/libgomp.c-c++-common/requires-1.c b/libgomp/testsuite/libgomp.c-c++-common/requires-1.c index fedf9779769..b760d5ebaf7 100644 --- a/libgomp/testsuite/libgomp.c-c++-common/requires-1.c +++ b/libgomp/testsuite/libgomp.c-c++-common/requires-1.c @@ -1,5 +1,6 @@ /* { dg-do link { target { offload_target_nvptx || offload_target_amdgcn } } } */ /* { dg-additional-sources requires-1-aux.c } */ +/* { dg-require-effective-target omp_usm } */ /* Check diagnostic by device-compiler's lto1. Other file uses: 'requires unified_address'. */ diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c index 1b35f19c45b..e73f1816f9a 100644 --- a/libgomp/testsuite/libgomp.c/usm-1.c +++ b/libgomp/testsuite/libgomp.c/usm-1.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-require-effective-target omp_usm } */ #include #include diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c index 689cee7e456..31f2bae7145 100644 --- a/libgomp/testsuite/libgomp.c/usm-2.c +++ b/libgomp/testsuite/libgomp.c/usm-2.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-require-effective-target omp_usm } */ #include #include diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c index 2ca66afe93f..2c78a0d8ced 100644 --- a/libgomp/testsuite/libgomp.c/usm-3.c +++ b/libgomp/testsuite/libgomp.c/usm-3.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-require-effective-target omp_usm } */ #include #include diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c index 753908c8440..1ac5498f73f 100644 --- a/libgomp/testsuite/libgomp.c/usm-4.c +++ b/libgomp/testsuite/libgomp.c/usm-4.c @@ -1,4 +1,5 @@ /* { dg-do run } */ +/* { dg-require-effective-target omp_usm } */ #include #include diff --git a/libgomp/testsuite/libgomp.c/usm-5.c b/libgomp/testsuite/libgomp.c/usm-5.c index 4d8b3cf71b1..563397f941a 100644 --- a/libgomp/testsuite/libgomp.c/usm-5.c +++ b/libgomp/testsuite/libgomp.c/usm-5.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-require-effective-target offload_device } */ +/* { dg-require-effective-target omp_usm } */ #include #include diff --git a/libgomp/testsuite/libgomp.c/usm-6.c b/libgomp/testsuite/libgomp.c/usm-6.c index c207140092a..bd14f8197b3 100644 --- a/libgomp/testsuite/libgomp.c/usm-6.c +++ b/libgomp/testsuite/libgomp.c/usm-6.c @@ -1,5 +1,5 @@ /* { dg-do run } */ -/* { dg-skip-if "Only valid for nvptx" { ! offload_target_nvptx } } */ +/* { dg-require-effective-target omp_usm } */ #include #include