public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] amdgcn, libgomp: USM allocation update
@ 2022-10-24 16:19 Andrew Stubbs
0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2022-10-24 16:19 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:6ec2c29dbbc19e7d2a8f991a5848e10c65c7c74c
commit 6ec2c29dbbc19e7d2a8f991a5848e10c65c7c74c
Author: Andrew Stubbs <ams@codesourcery.com>
Date: Sat Oct 15 23:38:50 2022 +0100
amdgcn, libgomp: USM allocation update
Allocate Unified Shared Memory via malloc and hsa_amd_svm_attributes_set,
instead of hsa_allocate_memory. This scheme should be more efficient for
for memory that is first accessed by the CPU.
libgomp/ChangeLog:
* plugin/plugin-gcn.c (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED): New.
(HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): New.
(HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG): New.
(HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED): New.
(hsa_amd_svm_attribute_pair_t): New.
(struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_set_fn.
(dump_hsa_system_info): Dump HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED and
HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT.
(DLSYM_OPT_FN): New.
(init_hsa_runtime_functions): Add hsa_amd_svm_attributes_set.
(GOMP_OFFLOAD_usm_alloc): Use malloc and hsa_amd_svm_attributes_set.
(GOMP_OFFLOAD_usm_free): Use regular free.
* testsuite/libgomp.c/usm-1.c: Add -mxnack=on for amdgcn.
* testsuite/libgomp.c/usm-2.c: Likewise.
* testsuite/libgomp.c/usm-3.c: Likewise.
* testsuite/libgomp.c/usm-4.c: Likewise.
Diff:
---
gcc/ChangeLog.omp | 19 +++++++++++
libgomp/plugin/plugin-gcn.c | 68 ++++++++++++++++++++++++++++++++++---
libgomp/testsuite/libgomp.c/usm-1.c | 1 +
libgomp/testsuite/libgomp.c/usm-2.c | 1 +
libgomp/testsuite/libgomp.c/usm-3.c | 1 +
libgomp/testsuite/libgomp.c/usm-4.c | 1 +
6 files changed, 86 insertions(+), 5 deletions(-)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 8fc8e06e9ff..1e05594a4c6 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,22 @@
+2022-10-24 Andrew Stubbs <ams@codesourcery.com>
+
+ * plugin/plugin-gcn.c (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED): New.
+ (HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): New.
+ (HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG): New.
+ (HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED): New.
+ (hsa_amd_svm_attribute_pair_t): New.
+ (struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_set_fn.
+ (dump_hsa_system_info): Dump HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED and
+ HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT.
+ (DLSYM_OPT_FN): New.
+ (init_hsa_runtime_functions): Add hsa_amd_svm_attributes_set.
+ (GOMP_OFFLOAD_usm_alloc): Use malloc and hsa_amd_svm_attributes_set.
+ (GOMP_OFFLOAD_usm_free): Use regular free.
+ * testsuite/libgomp.c/usm-1.c: Add -mxnack=on for amdgcn.
+ * testsuite/libgomp.c/usm-2.c: Likewise.
+ * testsuite/libgomp.c/usm-3.c: Likewise.
+ * testsuite/libgomp.c/usm-4.c: Likewise.
+
2022-10-24 Tobias Burnus <tobias@codesourcery.com>
Backported from master:
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index dd493f63912..4871a6a793b 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -113,6 +113,16 @@ struct gcn_thread
int async;
};
+/* TEMPORARY IMPORT, UNTIL hsa_ext_amd.h GETS UPDATED. */
+const static int HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201;
+const static int HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202;
+const static int HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0;
+const static int HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1;
+typedef struct hsa_amd_svm_attribute_pair_s {
+ uint64_t attribute;
+ uint64_t value;
+} hsa_amd_svm_attribute_pair_t;
+
/* As an HSA runtime is dlopened, following structure defines function
pointers utilized by the HSA plug-in. */
@@ -195,6 +205,9 @@ struct hsa_runtime_fn_info
hsa_status_t (*hsa_code_object_deserialize_fn)
(void *serialized_code_object, size_t serialized_code_object_size,
const char *options, hsa_code_object_t *code_object);
+ hsa_status_t (*hsa_amd_svm_attributes_set_fn)
+ (void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
+ size_t attribute_count);
};
/* Structure describing the run-time and grid properties of an HSA kernel
@@ -720,6 +733,24 @@ dump_hsa_system_info (void)
}
else
GCN_WARNING ("HSA_SYSTEM_INFO_EXTENSIONS: FAILED\n");
+
+ bool svm_supported;
+ status = hsa_fns.hsa_system_get_info_fn
+ (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED, &svm_supported);
+ if (status == HSA_STATUS_SUCCESS)
+ GCN_DEBUG ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: %s\n",
+ (svm_supported ? "TRUE" : "FALSE"));
+ else
+ GCN_WARNING ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: FAILED\n");
+
+ bool svm_accessible;
+ status = hsa_fns.hsa_system_get_info_fn
+ (HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT, &svm_accessible);
+ if (status == HSA_STATUS_SUCCESS)
+ GCN_DEBUG ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: %s\n",
+ (svm_accessible ? "TRUE" : "FALSE"));
+ else
+ GCN_WARNING ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: FAILED\n");
}
/* Dump information about the available hardware. */
@@ -1361,6 +1392,8 @@ init_hsa_runtime_functions (void)
hsa_fns.function##_fn = dlsym (handle, #function); \
if (hsa_fns.function##_fn == NULL) \
return false;
+#define DLSYM_OPT_FN(function) \
+ hsa_fns.function##_fn = dlsym (handle, #function);
void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
if (handle == NULL)
return false;
@@ -1395,6 +1428,7 @@ init_hsa_runtime_functions (void)
DLSYM_FN (hsa_signal_load_acquire)
DLSYM_FN (hsa_queue_destroy)
DLSYM_FN (hsa_code_object_deserialize)
+ DLSYM_OPT_FN (hsa_amd_svm_attributes_set)
return true;
#undef DLSYM_FN
}
@@ -3886,15 +3920,38 @@ static struct usm_splay_tree_s usm_map = { NULL };
/* Allocate memory suitable for Unified Shared Memory.
- In fact, AMD memory need only be "coarse grained", which target
- allocations already are. We do need to track allocations so that
- GOMP_OFFLOAD_is_usm_ptr can look them up. */
+ Normal heap memory is already enabled for USM, but by default it is "fine-
+ grained" memory, meaning that the GPU must access it via the system bus,
+ slowly. Changing the page to "coarse-grained" mode means that the page
+ is migrated on-demand and can therefore be accessed quickly by both CPU and
+ GPU (although care should be taken to prevent thrashing the page back and
+ forth).
+
+ GOMP_OFFLOAD_alloc also allocates coarse-grained memory, but in that case
+ the initial location is GPU memory; this function returns system memory.
+
+ We record and track allocations so that GOMP_OFFLOAD_is_usm_ptr can look
+ them up. */
void *
GOMP_OFFLOAD_usm_alloc (int device, size_t size)
{
- void *ptr = GOMP_OFFLOAD_alloc (device, size);
+ void *ptr = malloc (size);
+ if (!ptr || !hsa_fns.hsa_amd_svm_attributes_set_fn)
+ return ptr;
+
+ /* Register the heap allocation as coarse grained, which implies USM. */
+ struct hsa_amd_svm_attribute_pair_s attr = {
+ HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG,
+ HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED
+ };
+ hsa_status_t status = hsa_fns.hsa_amd_svm_attributes_set_fn (ptr, size,
+ &attr, 1);
+ if (status != HSA_STATUS_SUCCESS)
+ GOMP_PLUGIN_fatal ("Failed to allocate Unified Shared Memory;"
+ " please update your drivers and/or kernel");
+ /* Record the allocation for GOMP_OFFLOAD_is_usm_ptr. */
usm_splay_tree_node node = malloc (sizeof (struct usm_splay_tree_node_s));
node->key.addr = ptr;
node->key.size = size;
@@ -3918,7 +3975,8 @@ GOMP_OFFLOAD_usm_free (int device, void *ptr)
free (node);
}
- return GOMP_OFFLOAD_free (device, ptr);
+ free (ptr);
+ return true;
}
/* True if the memory was allocated via GOMP_OFFLOAD_usm_alloc. */
diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c
index e73f1816f9a..f7bf897b839 100644
--- a/libgomp/testsuite/libgomp.c/usm-1.c
+++ b/libgomp/testsuite/libgomp.c/usm-1.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
#include <omp.h>
#include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c
index 31f2bae7145..3f52adbd7e1 100644
--- a/libgomp/testsuite/libgomp.c/usm-2.c
+++ b/libgomp/testsuite/libgomp.c/usm-2.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
#include <omp.h>
#include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c
index 2c78a0d8ced..225cba5fe58 100644
--- a/libgomp/testsuite/libgomp.c/usm-3.c
+++ b/libgomp/testsuite/libgomp.c/usm-3.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
#include <omp.h>
#include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c
index 1ac5498f73f..d4addfc587a 100644
--- a/libgomp/testsuite/libgomp.c/usm-4.c
+++ b/libgomp/testsuite/libgomp.c/usm-4.c
@@ -1,5 +1,6 @@
/* { dg-do run } */
/* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
#include <omp.h>
#include <stdint.h>
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-10-24 16:19 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-24 16:19 [gcc/devel/omp/gcc-12] amdgcn, libgomp: USM allocation update Andrew Stubbs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).