From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 80926 invoked by alias); 18 Mar 2019 17:49:14 -0000 Mailing-List: contact newlib-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: newlib-cvs-owner@sourceware.org Received: (qmail 80831 invoked by uid 9078); 18 Mar 2019 17:49:14 -0000 Date: Mon, 18 Mar 2019 17:49:00 -0000 Message-ID: <20190318174914.80827.qmail@sourceware.org> Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Corinna Vinschen To: newlib-cvs@sourceware.org Subject: [newlib-cygwin] AMD GCN: Implement circular buffering. X-Act-Checkin: newlib-cygwin X-Git-Author: Andrew Stubbs X-Git-Refname: refs/heads/master X-Git-Oldrev: 38322b9bf64bcdee8025edbaacf93a6ba9559b5e X-Git-Newrev: 62c66a39bdcb64c74cdd001146b1d7e1e50c687d X-SW-Source: 2019-q1/txt/msg00039.txt.bz2 https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=62c66a39bdcb64c74cdd001146b1d7e1e50c687d commit 62c66a39bdcb64c74cdd001146b1d7e1e50c687d Author: Andrew Stubbs Date: Mon Mar 18 16:18:09 2019 +0000 AMD GCN: Implement circular buffering. The GCN port outputs stdout and stderr via a shared-memory interface. Previously the buffer was limited to 1000 write operations, which was enough for testing purposes, but easy to exhaust. This patch implements a new circular buffering system allowing a greater amount of output. The interface must allow hundreds of hardware threads to output simultaneously. The new limit is UINT32_MAX write operations. Unfortunately, there's no way to tell if the host side has also been updated. This code will misbehave unless the gcn-run from GCC is also updated (although it's fine the other way around), but that patch has already been committed. OK? Andrew Stubbs Mentor Graphics / CodeSourcery Diff: --- newlib/libc/sys/amdgcn/write.c | 55 +++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/newlib/libc/sys/amdgcn/write.c b/newlib/libc/sys/amdgcn/write.c index ce5bd36..9c0d2a9 100644 --- a/newlib/libc/sys/amdgcn/write.c +++ b/newlib/libc/sys/amdgcn/write.c @@ -26,10 +26,14 @@ The next_output counter must be atomically incremented for each print output. Only when the print data is fully written can the - "written" flag be set. */ + "written" flag be set. + + The buffer is circular; the host increments the consumed counter + and clears the written flag as it goes, opening up slots for reuse. + The counters always use absolute numbers. */ struct output { int return_value; - int next_output; + unsigned int next_output; struct printf_data { int written; char msg[128]; @@ -39,7 +43,8 @@ struct output { double dvalue; char text[128]; }; - } queue[1000]; + } queue[1024]; + unsigned int consumed; }; _READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count) @@ -55,33 +60,49 @@ _READ_WRITE_RETURN_TYPE write (int fd, const void *buf, size_t count) struct output *data = (struct output *)kernargs[2]; /* Each output slot allows 256 bytes, so reserve as many as we need. */ - int slot_count = ((count+1)/256)+1; - int index = __atomic_fetch_add (&data->next_output, slot_count, - __ATOMIC_ACQUIRE); + unsigned int slot_count = ((count+1)/256)+1; + unsigned int index = __atomic_fetch_add (&data->next_output, slot_count, + __ATOMIC_ACQUIRE); + + if ((unsigned int)(index + slot_count) < data->consumed) + { + /* Overflow. */ + errno = EFBIG; + return 0; + } + for (int c = count; - c >= 0 && index < 1000; + c >= 0; buf += 256, c -= 256, index++) { + unsigned int slot = index % 1024; + + /* Spinlock while the host catches up. */ + if (index >= 1024) + while (__atomic_load_n (&data->consumed, __ATOMIC_ACQUIRE) + <= (index - 1024)) + asm ("s_sleep 64"); + if (c < 128) { - memcpy (data->queue[index].msg, buf, c); - data->queue[index].msg[c] = '\0'; - data->queue[index].text[0] = '\0'; + memcpy (data->queue[slot].msg, buf, c); + data->queue[slot].msg[c] = '\0'; + data->queue[slot].text[0] = '\0'; } else if (c < 256) { - memcpy (data->queue[index].msg, buf, 128); - memcpy (data->queue[index].text, buf+128, c-128); - data->queue[index].text[c-128] = '\0'; + memcpy (data->queue[slot].msg, buf, 128); + memcpy (data->queue[slot].text, buf+128, c-128); + data->queue[slot].text[c-128] = '\0'; } else { - memcpy (data->queue[index].msg, buf, 128); - memcpy (data->queue[index].text, buf+128, 128); + memcpy (data->queue[slot].msg, buf, 128); + memcpy (data->queue[slot].text, buf+128, 128); } - data->queue[index].type = 3; /* Raw. */ - __atomic_store_n (&data->queue[index].written, 1, __ATOMIC_RELEASE); + data->queue[slot].type = 3; /* Raw. */ + __atomic_store_n (&data->queue[slot].written, 1, __ATOMIC_RELEASE); } return count;