public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [OG12 commit] amdgcn, libgomp: USM allocation update
@ 2022-10-24 16:26 Andrew Stubbs
  2023-02-16 20:50 ` [og12] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation (was: [OG12 commit] amdgcn, libgomp: USM allocation update) Thomas Schwinge
  0 siblings, 1 reply; 2+ messages in thread
From: Andrew Stubbs @ 2022-10-24 16:26 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 541 bytes --]

I've committed this patch to the devel/omp/gcc-12 branch. I will have to 
fold it into my previous OpenMP memory management patch series when I 
repost it.

The patch changes the internal memory allocation method such that memory 
is allocated in the regular heap and then marked as "coarse-grained", as 
opposed to allocating coarse-grained memory in the first place. The 
difference is that this is CPU first, not GPU first, which is typically 
the right way around, especially when we are using this for all possible 
allocations.

Andrew

[-- Attachment #2: 221024-usm-allocator-update.patch --]
[-- Type: text/plain, Size: 7621 bytes --]

amdgcn, libgomp: USM allocation update

Allocate Unified Shared Memory via malloc and hsa_amd_svm_attributes_set,
instead of hsa_allocate_memory.  This scheme should be more efficient for
for memory that is first accessed by the CPU.

libgomp/ChangeLog:

	* plugin/plugin-gcn.c (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED): New.
	(HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT): New.
	(HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG): New.
	(HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED): New.
	(hsa_amd_svm_attribute_pair_t): New.
	(struct hsa_runtime_fn_info): Add hsa_amd_svm_attributes_set_fn.
	(dump_hsa_system_info): Dump HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED and
	HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT.
	(DLSYM_OPT_FN): New.
	(init_hsa_runtime_functions): Add hsa_amd_svm_attributes_set.
	(GOMP_OFFLOAD_usm_alloc): Use malloc and hsa_amd_svm_attributes_set.
	(GOMP_OFFLOAD_usm_free): Use regular free.
	* testsuite/libgomp.c/usm-1.c: Add -mxnack=on for amdgcn.
	* testsuite/libgomp.c/usm-2.c: Likewise.
	* testsuite/libgomp.c/usm-3.c: Likewise.
	* testsuite/libgomp.c/usm-4.c: Likewise.

diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index dd493f63912..4871a6a793b 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -113,6 +113,16 @@ struct gcn_thread
   int async;
 };
 
+/* TEMPORARY IMPORT, UNTIL hsa_ext_amd.h GETS UPDATED.  */
+const static int HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED = 0x201;
+const static int HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT = 0x202;
+const static int HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG = 0;
+const static int HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED = 1;
+typedef struct hsa_amd_svm_attribute_pair_s {
+  uint64_t attribute;
+  uint64_t value;
+} hsa_amd_svm_attribute_pair_t;
+
 /* As an HSA runtime is dlopened, following structure defines function
    pointers utilized by the HSA plug-in.  */
 
@@ -195,6 +205,9 @@ struct hsa_runtime_fn_info
   hsa_status_t (*hsa_code_object_deserialize_fn)
     (void *serialized_code_object, size_t serialized_code_object_size,
      const char *options, hsa_code_object_t *code_object);
+  hsa_status_t (*hsa_amd_svm_attributes_set_fn)
+    (void* ptr, size_t size, hsa_amd_svm_attribute_pair_t* attribute_list,
+     size_t attribute_count);
 };
 
 /* Structure describing the run-time and grid properties of an HSA kernel
@@ -720,6 +733,24 @@ dump_hsa_system_info (void)
     }
   else
     GCN_WARNING ("HSA_SYSTEM_INFO_EXTENSIONS: FAILED\n");
+
+  bool svm_supported;
+  status = hsa_fns.hsa_system_get_info_fn
+    (HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED, &svm_supported);
+  if (status == HSA_STATUS_SUCCESS)
+    GCN_DEBUG ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: %s\n",
+	       (svm_supported ? "TRUE" : "FALSE"));
+  else
+    GCN_WARNING ("HSA_AMD_SYSTEM_INFO_SVM_SUPPORTED: FAILED\n");
+
+  bool svm_accessible;
+  status = hsa_fns.hsa_system_get_info_fn
+    (HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT, &svm_accessible);
+  if (status == HSA_STATUS_SUCCESS)
+    GCN_DEBUG ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: %s\n",
+	       (svm_accessible ? "TRUE" : "FALSE"));
+  else
+    GCN_WARNING ("HSA_SYSTEM_INFO_SVM_ACCESSIBLE_BY_DEFAULT: FAILED\n");
 }
 
 /* Dump information about the available hardware.  */
@@ -1361,6 +1392,8 @@ init_hsa_runtime_functions (void)
   hsa_fns.function##_fn = dlsym (handle, #function); \
   if (hsa_fns.function##_fn == NULL) \
     return false;
+#define DLSYM_OPT_FN(function) \
+  hsa_fns.function##_fn = dlsym (handle, #function);
   void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY);
   if (handle == NULL)
     return false;
@@ -1395,6 +1428,7 @@ init_hsa_runtime_functions (void)
   DLSYM_FN (hsa_signal_load_acquire)
   DLSYM_FN (hsa_queue_destroy)
   DLSYM_FN (hsa_code_object_deserialize)
+  DLSYM_OPT_FN (hsa_amd_svm_attributes_set)
   return true;
 #undef DLSYM_FN
 }
@@ -3886,15 +3920,38 @@ static struct usm_splay_tree_s usm_map = { NULL };
 
 /* Allocate memory suitable for Unified Shared Memory.
 
-   In fact, AMD memory need only be "coarse grained", which target
-   allocations already are.  We do need to track allocations so that
-   GOMP_OFFLOAD_is_usm_ptr can look them up.  */
+   Normal heap memory is already enabled for USM, but by default it is "fine-
+   grained" memory, meaning that the GPU must access it via the system bus,
+   slowly.  Changing the page to "coarse-grained" mode means that the page
+   is migrated on-demand and can therefore be accessed quickly by both CPU and
+   GPU (although care should be taken to prevent thrashing the page back and
+   forth).
+
+   GOMP_OFFLOAD_alloc also allocates coarse-grained memory, but in that case
+   the initial location is GPU memory; this function returns system memory.
+
+   We record and track allocations so that GOMP_OFFLOAD_is_usm_ptr can look
+   them up.  */
 
 void *
 GOMP_OFFLOAD_usm_alloc (int device, size_t size)
 {
-  void *ptr = GOMP_OFFLOAD_alloc (device, size);
+  void *ptr = malloc (size);
+  if (!ptr || !hsa_fns.hsa_amd_svm_attributes_set_fn)
+    return ptr;
+
+  /* Register the heap allocation as coarse grained, which implies USM.  */
+  struct hsa_amd_svm_attribute_pair_s attr = {
+    HSA_AMD_SVM_ATTRIB_GLOBAL_FLAG,
+    HSA_AMD_SVM_GLOBAL_FLAG_COARSE_GRAINED
+  };
+  hsa_status_t status = hsa_fns.hsa_amd_svm_attributes_set_fn (ptr, size,
+							       &attr, 1);
+  if (status != HSA_STATUS_SUCCESS)
+    GOMP_PLUGIN_fatal ("Failed to allocate Unified Shared Memory;"
+		       " please update your drivers and/or kernel");
 
+  /* Record the allocation for GOMP_OFFLOAD_is_usm_ptr.  */
   usm_splay_tree_node node = malloc (sizeof (struct usm_splay_tree_node_s));
   node->key.addr = ptr;
   node->key.size = size;
@@ -3918,7 +3975,8 @@ GOMP_OFFLOAD_usm_free (int device, void *ptr)
       free (node);
     }
 
-  return GOMP_OFFLOAD_free (device, ptr);
+  free (ptr);
+  return true;
 }
 
 /* True if the memory was allocated via GOMP_OFFLOAD_usm_alloc.  */
diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c
index e73f1816f9a..f7bf897b839 100644
--- a/libgomp/testsuite/libgomp.c/usm-1.c
+++ b/libgomp/testsuite/libgomp.c/usm-1.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c
index 31f2bae7145..3f52adbd7e1 100644
--- a/libgomp/testsuite/libgomp.c/usm-2.c
+++ b/libgomp/testsuite/libgomp.c/usm-2.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c
index 2c78a0d8ced..225cba5fe58 100644
--- a/libgomp/testsuite/libgomp.c/usm-3.c
+++ b/libgomp/testsuite/libgomp.c/usm-3.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c
index 1ac5498f73f..d4addfc587a 100644
--- a/libgomp/testsuite/libgomp.c/usm-4.c
+++ b/libgomp/testsuite/libgomp.c/usm-4.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
+/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>

^ permalink raw reply	[flat|nested] 2+ messages in thread

* [og12] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation (was: [OG12 commit] amdgcn, libgomp: USM allocation update)
  2022-10-24 16:26 [OG12 commit] amdgcn, libgomp: USM allocation update Andrew Stubbs
@ 2023-02-16 20:50 ` Thomas Schwinge
  0 siblings, 0 replies; 2+ messages in thread
From: Thomas Schwinge @ 2023-02-16 20:50 UTC (permalink / raw)
  To: Andrew Stubbs, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1176 bytes --]

Hi!

On 2022-10-24T17:26:44+0100, Andrew Stubbs <ams@codesourcery.com> wrote:
> I've committed this patch to the devel/omp/gcc-12 branch.

> --- a/libgomp/testsuite/libgomp.c/usm-1.c
> +++ b/libgomp/testsuite/libgomp.c/usm-1.c

> --- a/libgomp/testsuite/libgomp.c/usm-2.c
> +++ b/libgomp/testsuite/libgomp.c/usm-2.c

> --- a/libgomp/testsuite/libgomp.c/usm-3.c
> +++ b/libgomp/testsuite/libgomp.c/usm-3.c

> --- a/libgomp/testsuite/libgomp.c/usm-4.c
> +++ b/libgomp/testsuite/libgomp.c/usm-4.c

> @@ -1,5 +1,6 @@
>  /* { dg-do run } */
>  /* { dg-require-effective-target omp_usm } */
> +/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */

I've pushed to devel/omp/gcc-12 branch
commit b4d4603df3fed290ccf721899be6bc69f037fe2b
"'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation",
see attached.


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-libgomp.c-usm-1-2-3-4-.c-Re-enable-non-GCN-offloadin.patch --]
[-- Type: text/x-diff, Size: 3661 bytes --]

From b4d4603df3fed290ccf721899be6bc69f037fe2b Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Tue, 14 Feb 2023 18:57:04 +0100
Subject: [PATCH] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading
 compilation

Change '-foffload=amdgcn-amdhsa=[...]' to
'-foffload-options=amdgcn-amdhsa=[...]', so that non-GCN offloading compilation
doesn't get disabled.

Fix-up for og12 commit 6ec2c29dbbc19e7d2a8f991a5848e10c65c7c74c
"amdgcn, libgomp: USM allocation update".

	libgomp/
	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
	compilation.
	* testsuite/libgomp.c/usm-2.c: Likewise.
	* testsuite/libgomp.c/usm-3.c: Likewise.
	* testsuite/libgomp.c/usm-4.c: Likewise.
---
 libgomp/ChangeLog.omp               | 8 ++++++++
 libgomp/testsuite/libgomp.c/usm-1.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-2.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-3.c | 2 +-
 libgomp/testsuite/libgomp.c/usm-4.c | 2 +-
 5 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index 2a20516cd09..ecc14b4f537 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,11 @@
+2023-02-16  Thomas Schwinge  <thomas@codesourcery.com>
+
+	* testsuite/libgomp.c/usm-1.c: Re-enable non-GCN offloading
+	compilation.
+	* testsuite/libgomp.c/usm-2.c: Likewise.
+	* testsuite/libgomp.c/usm-3.c: Likewise.
+	* testsuite/libgomp.c/usm-4.c: Likewise.
+
 2023-02-16  Tobias Burnus  <tobias@codesourcery.com>
 
 	Backported from master:
diff --git a/libgomp/testsuite/libgomp.c/usm-1.c b/libgomp/testsuite/libgomp.c/usm-1.c
index f7bf897b839..35f37de7542 100644
--- a/libgomp/testsuite/libgomp.c/usm-1.c
+++ b/libgomp/testsuite/libgomp.c/usm-1.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-2.c b/libgomp/testsuite/libgomp.c/usm-2.c
index 3f52adbd7e1..783075edb54 100644
--- a/libgomp/testsuite/libgomp.c/usm-2.c
+++ b/libgomp/testsuite/libgomp.c/usm-2.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-3.c b/libgomp/testsuite/libgomp.c/usm-3.c
index 225cba5fe58..733f0f34090 100644
--- a/libgomp/testsuite/libgomp.c/usm-3.c
+++ b/libgomp/testsuite/libgomp.c/usm-3.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
diff --git a/libgomp/testsuite/libgomp.c/usm-4.c b/libgomp/testsuite/libgomp.c/usm-4.c
index d4addfc587a..5bf99df3b24 100644
--- a/libgomp/testsuite/libgomp.c/usm-4.c
+++ b/libgomp/testsuite/libgomp.c/usm-4.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-require-effective-target omp_usm } */
-/* { dg-options "-foffload=amdgcn-amdhsa=-mxnack=on" { target offload_target_amdgcn } } */
+/* { dg-additional-options -foffload-options=amdgcn-amdhsa=-mxnack=on { target offload_target_amdgcn } } */
 
 #include <omp.h>
 #include <stdint.h>
-- 
2.25.1


^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2023-02-16 20:50 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-24 16:26 [OG12 commit] amdgcn, libgomp: USM allocation update Andrew Stubbs
2023-02-16 20:50 ` [og12] 'libgomp.c/usm-{1,2,3,4}.c': Re-enable non-GCN offloading compilation (was: [OG12 commit] amdgcn, libgomp: USM allocation update) Thomas Schwinge

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).