From: Julian Brown <julian@codesourcery.com>
To: <gcc-patches@gcc.gnu.org>
Cc: <cltang@codesourcery.com>, <Catherine_Moore@mentor.com>,
<jakub@redhat.com>
Subject: [PATCH 1/6] [og8] Host-to-device transfer coalescing & magic offset value self-documentation
Date: Tue, 20 Nov 2018 21:55:00 -0000 [thread overview]
Message-ID: <b8aad8249d908a9b759bb7ce111e1b6ffaf33291.1542748807.git.julian@codesourcery.com> (raw)
In-Reply-To: <cover.1542748807.git.julian@codesourcery.com>
[-- Attachment #1: Type: text/plain, Size: 749 bytes --]
Previously posted upstream:
https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00825.html
libgomp/
* libgomp.h (OFFSET_INLINED, OFFSET_POINTER, OFFSET_STRUCT): Define.
* target.c (FIELD_TGT_EMPTY): Define.
(gomp_coalesce_chunk): New.
(gomp_coalesce_buf): Use above instead of flat array of size_t pairs.
(gomp_coalesce_buf_add): Adjust for above change.
(gomp_copy_host2dev): Likewise.
(gomp_map_val): Use OFFSET_* macros instead of magic constants. Write
as switch instead of list of ifs.
(gomp_map_vars_async): Adjust for gomp_coalesce_chunk change. Use
OFFSET_* macros.
---
libgomp/libgomp.h | 5 +++
libgomp/target.c | 101 +++++++++++++++++++++++++++++++---------------------
2 files changed, 65 insertions(+), 41 deletions(-)
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-og8-Host-to-device-transfer-coalescing-magic-offset-.patch --]
[-- Type: text/x-patch; name="0001-og8-Host-to-device-transfer-coalescing-magic-offset-.patch", Size: 9484 bytes --]
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 607f4c2..acf7f8f 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -842,6 +842,11 @@ struct target_mem_desc {
artificial pointer to "omp declare target link" object. */
#define REFCOUNT_LINK (~(uintptr_t) 1)
+/* Special offset values. */
+#define OFFSET_INLINED (~(uintptr_t) 0)
+#define OFFSET_POINTER (~(uintptr_t) 1)
+#define OFFSET_STRUCT (~(uintptr_t) 2)
+
struct splay_tree_key_s {
/* Address of the host object. */
uintptr_t host_start;
diff --git a/libgomp/target.c b/libgomp/target.c
index ab17650..7220ac6 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -45,6 +45,8 @@
#include "plugin-suffix.h"
#endif
+#define FIELD_TGT_EMPTY (~(size_t) 0)
+
static void gomp_target_init (void);
/* The whole initialization code for offloading plugins is only run one. */
@@ -206,8 +208,14 @@ goacc_device_copy_async (struct gomp_device_descr *devicep,
}
}
-/* Infrastructure for coalescing adjacent or nearly adjacent (in device addresses)
- host to device memory transfers. */
+/* Infrastructure for coalescing adjacent or nearly adjacent (in device
+ addresses) host to device memory transfers. */
+
+struct gomp_coalesce_chunk
+{
+ /* The starting and ending point of a coalesced chunk of memory. */
+ size_t start, end;
+};
struct gomp_coalesce_buf
{
@@ -215,10 +223,10 @@ struct gomp_coalesce_buf
it will be copied to the device. */
void *buf;
struct target_mem_desc *tgt;
- /* Array with offsets, chunks[2 * i] is the starting offset and
- chunks[2 * i + 1] ending offset relative to tgt->tgt_start device address
+ /* Array with offsets, chunks[i].start is the starting offset and
+ chunks[i].end ending offset relative to tgt->tgt_start device address
of chunks which are to be copied to buf and later copied to device. */
- size_t *chunks;
+ struct gomp_coalesce_chunk *chunks;
/* Number of chunks in chunks array, or -1 if coalesce buffering should not
be performed. */
long chunk_cnt;
@@ -251,14 +259,14 @@ gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
{
if (cbuf->chunk_cnt < 0)
return;
- if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
+ if (start < cbuf->chunks[cbuf->chunk_cnt-1].end)
{
cbuf->chunk_cnt = -1;
return;
}
- if (start < cbuf->chunks[2 * cbuf->chunk_cnt - 1] + MAX_COALESCE_BUF_GAP)
+ if (start < cbuf->chunks[cbuf->chunk_cnt-1].end + MAX_COALESCE_BUF_GAP)
{
- cbuf->chunks[2 * cbuf->chunk_cnt - 1] = start + len;
+ cbuf->chunks[cbuf->chunk_cnt-1].end = start + len;
cbuf->use_cnt++;
return;
}
@@ -268,8 +276,8 @@ gomp_coalesce_buf_add (struct gomp_coalesce_buf *cbuf, size_t start, size_t len)
if (cbuf->use_cnt == 1)
cbuf->chunk_cnt--;
}
- cbuf->chunks[2 * cbuf->chunk_cnt] = start;
- cbuf->chunks[2 * cbuf->chunk_cnt + 1] = start + len;
+ cbuf->chunks[cbuf->chunk_cnt].start = start;
+ cbuf->chunks[cbuf->chunk_cnt].end = start + len;
cbuf->chunk_cnt++;
cbuf->use_cnt = 1;
}
@@ -301,20 +309,20 @@ gomp_copy_host2dev (struct gomp_device_descr *devicep,
if (cbuf)
{
uintptr_t doff = (uintptr_t) d - cbuf->tgt->tgt_start;
- if (doff < cbuf->chunks[2 * cbuf->chunk_cnt - 1])
+ if (doff < cbuf->chunks[cbuf->chunk_cnt-1].end)
{
long first = 0;
long last = cbuf->chunk_cnt - 1;
while (first <= last)
{
long middle = (first + last) >> 1;
- if (cbuf->chunks[2 * middle + 1] <= doff)
+ if (cbuf->chunks[middle].end <= doff)
first = middle + 1;
- else if (cbuf->chunks[2 * middle] <= doff)
+ else if (cbuf->chunks[middle].start <= doff)
{
- if (doff + sz > cbuf->chunks[2 * middle + 1])
+ if (doff + sz > cbuf->chunks[middle].end)
gomp_fatal ("internal libgomp cbuf error");
- memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0]),
+ memcpy ((char *) cbuf->buf + (doff - cbuf->chunks[0].start),
h, sz);
return;
}
@@ -538,17 +546,25 @@ gomp_map_val (struct target_mem_desc *tgt, void **hostaddrs, size_t i)
return tgt->list[i].key->tgt->tgt_start
+ tgt->list[i].key->tgt_offset
+ tgt->list[i].offset;
- if (tgt->list[i].offset == ~(uintptr_t) 0)
- return (uintptr_t) hostaddrs[i];
- if (tgt->list[i].offset == ~(uintptr_t) 1)
- return 0;
- if (tgt->list[i].offset == ~(uintptr_t) 2)
- return tgt->list[i + 1].key->tgt->tgt_start
- + tgt->list[i + 1].key->tgt_offset
- + tgt->list[i + 1].offset
- + (uintptr_t) hostaddrs[i]
- - (uintptr_t) hostaddrs[i + 1];
- return tgt->tgt_start + tgt->list[i].offset;
+
+ switch (tgt->list[i].offset)
+ {
+ case OFFSET_INLINED:
+ return (uintptr_t) hostaddrs[i];
+
+ case OFFSET_POINTER:
+ return 0;
+
+ case OFFSET_STRUCT:
+ return tgt->list[i + 1].key->tgt->tgt_start
+ + tgt->list[i + 1].key->tgt_offset
+ + tgt->list[i + 1].offset
+ + (uintptr_t) hostaddrs[i]
+ - (uintptr_t) hostaddrs[i + 1];
+
+ default:
+ return tgt->tgt_start + tgt->list[i].offset;
+ }
}
/* Dynamic array related data structures, interfaces with the compiler. */
@@ -758,8 +774,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
cbuf.buf = NULL;
if (mapnum > 1 || pragma_kind == GOMP_MAP_VARS_TARGET)
{
- cbuf.chunks
- = (size_t *) gomp_alloca ((2 * mapnum + 2) * sizeof (size_t));
+ size_t chunk_size = (mapnum + 1) * sizeof (struct gomp_coalesce_chunk);
+ cbuf.chunks = (struct gomp_coalesce_chunk *) gomp_alloca (chunk_size);
cbuf.chunk_cnt = 0;
}
if (pragma_kind == GOMP_MAP_VARS_TARGET)
@@ -769,8 +785,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
tgt_size = mapnum * sizeof (void *);
cbuf.chunk_cnt = 1;
cbuf.use_cnt = 1 + (mapnum > 1);
- cbuf.chunks[0] = 0;
- cbuf.chunks[1] = tgt_size;
+ cbuf.chunks[0].start = 0;
+ cbuf.chunks[0].end = tgt_size;
}
gomp_mutex_lock (&devicep->lock);
@@ -788,7 +804,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
|| (kind & typemask) == GOMP_MAP_FIRSTPRIVATE_INT)
{
tgt->list[i].key = NULL;
- tgt->list[i].offset = ~(uintptr_t) 0;
+ tgt->list[i].offset = OFFSET_INLINED;
continue;
}
else if ((kind & typemask) == GOMP_MAP_USE_DEVICE_PTR)
@@ -806,7 +822,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
= (void *) (n->tgt->tgt_start + n->tgt_offset
+ cur_node.host_start);
tgt->list[i].key = NULL;
- tgt->list[i].offset = ~(uintptr_t) 0;
+ tgt->list[i].offset = OFFSET_INLINED;
continue;
}
else if ((kind & typemask) == GOMP_MAP_STRUCT)
@@ -817,7 +833,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
cur_node.host_end = (uintptr_t) hostaddrs[last]
+ sizes[last];
tgt->list[i].key = NULL;
- tgt->list[i].offset = ~(uintptr_t) 2;
+ tgt->list[i].offset = OFFSET_STRUCT;
splay_tree_key n = splay_tree_lookup (mem_map, &cur_node);
if (n == NULL)
{
@@ -850,7 +866,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
else if ((kind & typemask) == GOMP_MAP_ALWAYS_POINTER)
{
tgt->list[i].key = NULL;
- tgt->list[i].offset = ~(uintptr_t) 1;
+ tgt->list[i].offset = OFFSET_POINTER;
has_firstprivate = true;
continue;
}
@@ -894,7 +910,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
if (!n)
{
tgt->list[i].key = NULL;
- tgt->list[i].offset = ~(uintptr_t) 1;
+ tgt->list[i].offset = OFFSET_POINTER;
continue;
}
}
@@ -1018,7 +1034,7 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
if (cbuf.chunk_cnt > 0)
{
cbuf.buf
- = malloc (cbuf.chunks[2 * cbuf.chunk_cnt - 1] - cbuf.chunks[0]);
+ = malloc (cbuf.chunks[cbuf.chunk_cnt-1].end - cbuf.chunks[0].start);
if (cbuf.buf)
{
cbuf.tgt = tgt;
@@ -1144,6 +1160,8 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
else
k->host_end = k->host_start + sizeof (void *);
splay_tree_key n = splay_tree_lookup (mem_map, k);
+ /* Need to account for the case where a struct field hasn't been
+ mapped onto the accelerator yet. */
if (n && n->refcount != REFCOUNT_LINK)
gomp_map_vars_existing (devicep, aq, n, k, &tgt->list[i],
kind & typemask, cbufp);
@@ -1160,12 +1178,12 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
size_t align = (size_t) 1 << (kind >> rshift);
tgt->list[i].key = k;
k->tgt = tgt;
- if (field_tgt_clear != ~(size_t) 0)
+ if (field_tgt_clear != FIELD_TGT_EMPTY)
{
k->tgt_offset = k->host_start - field_tgt_base
+ field_tgt_offset;
if (i == field_tgt_clear)
- field_tgt_clear = ~(size_t) 0;
+ field_tgt_clear = FIELD_TGT_EMPTY;
}
else
{
@@ -1419,9 +1437,10 @@ gomp_map_vars_async (struct gomp_device_descr *devicep,
long c = 0;
for (c = 0; c < cbuf.chunk_cnt; ++c)
gomp_copy_host2dev (devicep, aq,
- (void *) (tgt->tgt_start + cbuf.chunks[2 * c]),
- (char *) cbuf.buf + (cbuf.chunks[2 * c] - cbuf.chunks[0]),
- cbuf.chunks[2 * c + 1] - cbuf.chunks[2 * c], NULL);
+ (void *) (tgt->tgt_start + cbuf.chunks[c].start),
+ (char *) cbuf.buf + (cbuf.chunks[c].start
+ - cbuf.chunks[0].start),
+ cbuf.chunks[c].end - cbuf.chunks[c].start, NULL);
free (cbuf.buf);
}
next prev parent reply other threads:[~2018-11-20 21:55 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-20 21:55 [PATCH 0/6] [og8] OpenACC attach/detach Julian Brown
2018-11-20 21:55 ` [PATCH 3/6] [og8] OpenACC 2.6 manual deep copy support (attach/detach) Julian Brown
2018-11-22 15:48 ` Bernhard Reutner-Fischer
2018-11-20 21:55 ` [PATCH 2/6] [og8] Factor out duplicate code in gimplify_scan_omp_clauses Julian Brown
2018-11-20 21:55 ` [PATCH 4/6] [og8] Interaction of dynamic/multidimensional arrays with attach/detach Julian Brown
2018-11-20 21:55 ` Julian Brown [this message]
2018-11-20 21:56 ` [PATCH 5/6] [og8] Backport parts of upstream declare-allocate patch Julian Brown
2018-11-20 21:57 ` [PATCH 6/6] [og8] OpenACC refcounting refresh Julian Brown
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=b8aad8249d908a9b759bb7ce111e1b6ffaf33291.1542748807.git.julian@codesourcery.com \
--to=julian@codesourcery.com \
--cc=Catherine_Moore@mentor.com \
--cc=cltang@codesourcery.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jakub@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).