public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [committed] [OG10] Add nvptx support for subword compare-and-swap
@ 2020-08-12 20:17 Kwok Cheung Yeung
  0 siblings, 0 replies; only message in thread
From: Kwok Cheung Yeung @ 2020-08-12 20:17 UTC (permalink / raw)
  To: GCC Patches, Thomas Schwinge

[-- Attachment #1: Type: text/plain, Size: 246 bytes --]

Hello

I have committed the patch previously posted at 
https://gcc.gnu.org/pipermail/gcc-patches/2020-July/550291.html to support 
atomic compare-and-swap operations on 8-bit and 16-bit types on nvptx to the 
devel/omp/gcc-10 branch only.

Kwok

[-- Attachment #2: nvptx_og10.patch --]
[-- Type: text/plain, Size: 11094 bytes --]

commit 9dc77fbd268ea138797ecc340cf6d9ddc13795c8
Author: Kwok Cheung Yeung <kcy@codesourcery.com>
Date:   Wed Aug 12 12:37:20 2020 -0700

    nvptx: Add support for subword compare-and-swap
    
    This adds support for __sync_val_compare_and_swap and
    __sync_bool_compare_and_swap for 1-byte and 2-byte long
    values, which are not natively supported on nvptx.
    
    2020-08-12  Kwok Cheung Yeung  <kcy@codesourcery.com>
    
    	libgcc/
    	* config/nvptx/atomic.c: New.
    	* config/nvptx/t-nvptx (LIB2ADD): Add atomic.c.
    
    	gcc/testsuite/
    	* gcc.target/nvptx/sync.c: New.
    
    	libgomp/
    	* testsuite/libgomp.c-c++-common/reduction-16.c: New.

diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index e03cd1b..aba7d39 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,7 @@
+2020-08-12  Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+	* gcc.target/nvptx/sync.c: New.
+
 2020-07-28  Kwok Cheung Yeung  <kcy@codesourcery.com>
 
 	* c-c++-common/goacc/routine-4.c (seq, vector, worker, gang): Revert
diff --git a/gcc/testsuite/gcc.target/nvptx/sync.c b/gcc/testsuite/gcc.target/nvptx/sync.c
new file mode 100644
index 0000000..a573824
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/sync.c
@@ -0,0 +1,143 @@
+/* { dg-do run } */
+
+/* Test basic functionality of the intrinsics.  */
+
+/* This is a copy of gcc.dg/ia64-sync-2.c, extended to test 8-bit and 16-bit
+   values as well.  */
+
+/* Ideally this test should require sync_char_short and sync_int_long, but we
+   only support a subset at the moment.  */
+
+__extension__ typedef __SIZE_TYPE__ size_t;
+
+extern void abort (void);
+extern void *memcpy (void *, const void *, size_t);
+extern int memcmp (const void *, const void *, size_t);
+
+static char AC[4];
+static char init_qi[4] = { -30,-30,-50,-50 };
+static char test_qi[4] = { -115,-115,25,25 };
+
+static void
+do_qi (void)
+{
+  if (__sync_val_compare_and_swap(AC+0, -30, -115) != -30)
+    abort ();
+  if (__sync_val_compare_and_swap(AC+0, -30, -115) != -115)
+    abort ();
+  if (__sync_bool_compare_and_swap(AC+1, -30, -115) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AC+1, -30, -115) != 0)
+    abort ();
+
+  if (__sync_val_compare_and_swap(AC+2, AC[2], 25) != -50)
+    abort ();
+  if (__sync_val_compare_and_swap(AC+2, AC[2], 25) != 25)
+    abort ();
+  if (__sync_bool_compare_and_swap(AC+3, AC[3], 25) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AC+3, AC[3], 25) != 1)
+    abort ();
+}
+
+static short AS[4];
+static short init_hi[4] = { -30,-30,-50,-50 };
+static short test_hi[4] = { -115,-115,25,25 };
+
+static void
+do_hi (void)
+{
+  if (__sync_val_compare_and_swap(AS+0, -30, -115) != -30)
+    abort ();
+  if (__sync_val_compare_and_swap(AS+0, -30, -115) != -115)
+    abort ();
+  if (__sync_bool_compare_and_swap(AS+1, -30, -115) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AS+1, -30, -115) != 0)
+    abort ();
+
+  if (__sync_val_compare_and_swap(AS+2, AS[2], 25) != -50)
+    abort ();
+  if (__sync_val_compare_and_swap(AS+2, AS[2], 25) != 25)
+    abort ();
+  if (__sync_bool_compare_and_swap(AS+3, AS[3], 25) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AS+3, AS[3], 25) != 1)
+    abort ();
+}
+
+static int AI[4];
+static int init_si[4] = { -30,-30,-50,-50 };
+static int test_si[4] = { -115,-115,25,25 };
+
+static void
+do_si (void)
+{
+  if (__sync_val_compare_and_swap(AI+0, -30, -115) != -30)
+    abort ();
+  if (__sync_val_compare_and_swap(AI+0, -30, -115) != -115)
+    abort ();
+  if (__sync_bool_compare_and_swap(AI+1, -30, -115) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AI+1, -30, -115) != 0)
+    abort ();
+
+  if (__sync_val_compare_and_swap(AI+2, AI[2], 25) != -50)
+    abort ();
+  if (__sync_val_compare_and_swap(AI+2, AI[2], 25) != 25)
+    abort ();
+  if (__sync_bool_compare_and_swap(AI+3, AI[3], 25) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AI+3, AI[3], 25) != 1)
+    abort ();
+}
+
+static long AL[4];
+static long init_di[4] = { -30,-30,-50,-50 };
+static long test_di[4] = { -115,-115,25,25 };
+
+static void
+do_di (void)
+{
+  if (__sync_val_compare_and_swap(AL+0, -30, -115) != -30)
+    abort ();
+  if (__sync_val_compare_and_swap(AL+0, -30, -115) != -115)
+    abort ();
+  if (__sync_bool_compare_and_swap(AL+1, -30, -115) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AL+1, -30, -115) != 0)
+    abort ();
+
+  if (__sync_val_compare_and_swap(AL+2, AL[2], 25) != -50)
+    abort ();
+  if (__sync_val_compare_and_swap(AL+2, AL[2], 25) != 25)
+    abort ();
+  if (__sync_bool_compare_and_swap(AL+3, AL[3], 25) != 1)
+    abort ();
+  if (__sync_bool_compare_and_swap(AL+3, AL[3], 25) != 1)
+    abort ();
+}
+
+int main()
+{
+  memcpy(AC, init_qi, sizeof(init_qi));
+  memcpy(AS, init_hi, sizeof(init_hi));
+  memcpy(AI, init_si, sizeof(init_si));
+  memcpy(AL, init_di, sizeof(init_di));
+
+  do_qi ();
+  do_hi ();
+  do_si ();
+  do_di ();
+
+  if (memcmp (AC, test_qi, sizeof(test_qi)))
+    abort ();
+  if (memcmp (AS, test_hi, sizeof(test_hi)))
+    abort ();
+  if (memcmp (AI, test_si, sizeof(test_si)))
+    abort ();
+  if (memcmp (AL, test_di, sizeof(test_di)))
+    abort ();
+
+  return 0;
+}
diff --git a/libgcc/ChangeLog.omp b/libgcc/ChangeLog.omp
new file mode 100644
index 0000000..2009c3e
--- /dev/null
+++ b/libgcc/ChangeLog.omp
@@ -0,0 +1,4 @@
+2020-08-12  Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+	* config/nvptx/atomic.c: New.
+	* config/nvptx/t-nvptx (LIB2ADD): Add atomic.c.
diff --git a/libgcc/config/nvptx/atomic.c b/libgcc/config/nvptx/atomic.c
new file mode 100644
index 0000000..25a34fb
--- /dev/null
+++ b/libgcc/config/nvptx/atomic.c
@@ -0,0 +1,70 @@
+/* NVPTX atomic operations
+   Copyright (C) 2020 Free Software Foundation, Inc.
+   Contributed by Mentor Graphics.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdbool.h>
+
+/* Implement __sync_val_compare_and_swap and __sync_bool_compare_and_swap
+   for 1 and 2-byte values (which are not natively supported) in terms of
+   __sync_val_compare_and_swap for 4-byte values (which is supported).
+   This assumes that the contents of the word surrounding the subword
+   value that we are interested in are accessible as well (which should
+   normally be the case).  */
+
+#define __SYNC_SUBWORD_COMPARE_AND_SWAP(TYPE, SIZE)			     \
+									     \
+TYPE									     \
+__sync_val_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval)     \
+{									     \
+  unsigned int *wordptr = (unsigned int *)((__UINTPTR_TYPE__ ) ptr & ~3UL);  \
+  int shift = ((__UINTPTR_TYPE__ ) ptr & 3UL) * 8;			     \
+  unsigned int valmask = (1 << (SIZE * 8)) - 1;				     \
+  unsigned int wordmask = ~(valmask << shift);				     \
+  unsigned int oldword = *wordptr;					     \
+  for (;;)								     \
+    {									     \
+      TYPE prevval = (oldword >> shift) & valmask;			     \
+      /* Exit if the subword value previously read from memory is not */     \
+      /* equal to the expected value OLDVAL.  */			     \
+      if (__builtin_expect (prevval != oldval, 0))			     \
+	return prevval;							     \
+      unsigned int newword = oldword & wordmask;			     \
+      newword |= ((unsigned int) newval) << shift;			     \
+      unsigned int prevword						     \
+	  = __sync_val_compare_and_swap_4 (wordptr, oldword, newword);	     \
+      /* Exit only if the compare-and-swap succeeds on the whole word */     \
+      /* (i.e. the contents of *WORDPTR have not changed since the last */   \
+      /* memory read).  */						     \
+      if (__builtin_expect (prevword == oldword, 1))			     \
+	return oldval;							     \
+      oldword = prevword;						     \
+    }									     \
+}									     \
+									     \
+bool									     \
+__sync_bool_compare_and_swap_##SIZE (TYPE *ptr, TYPE oldval, TYPE newval)    \
+{									     \
+  return __sync_val_compare_and_swap_##SIZE (ptr, oldval, newval) == oldval; \
+}
+
+__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned char, 1)
+__SYNC_SUBWORD_COMPARE_AND_SWAP (unsigned short, 2)
diff --git a/libgcc/config/nvptx/t-nvptx b/libgcc/config/nvptx/t-nvptx
index c4d20c9..ede0bf0 100644
--- a/libgcc/config/nvptx/t-nvptx
+++ b/libgcc/config/nvptx/t-nvptx
@@ -1,5 +1,6 @@
 LIB2ADD=$(srcdir)/config/nvptx/reduction.c \
-	$(srcdir)/config/nvptx/mgomp.c
+	$(srcdir)/config/nvptx/mgomp.c \
+	$(srcdir)/config/nvptx/atomic.c
 
 LIB2ADDEH=
 LIB2FUNCS_EXCLUDE=__main
diff --git a/libgomp/ChangeLog.omp b/libgomp/ChangeLog.omp
index e741121..383c1f7 100644
--- a/libgomp/ChangeLog.omp
+++ b/libgomp/ChangeLog.omp
@@ -1,3 +1,7 @@
+2020-08-12  Kwok Cheung Yeung  <kcy@codesourcery.com>
+
+	* testsuite/libgomp.c-c++-common/reduction-16.c: New.
+
 2020-07-30  Julian Brown  <julian@codesourcery.com>
 
 	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c: Fix for GCN
diff --git a/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c
new file mode 100644
index 0000000..d0e82b0
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c-c++-common/reduction-16.c
@@ -0,0 +1,53 @@
+/* { dg-do run } */
+
+#include <stdlib.h>
+
+#define N 512
+
+#define GENERATE_TEST(T)	\
+int test_##T (void)		\
+{				\
+  T a[N], res = 0;		\
+				\
+  for (int i = 0; i < N; ++i)	\
+    a[i] = i & 1;		\
+				\
+_Pragma("omp target teams distribute reduction(||:res) defaultmap(tofrom:scalar)") \
+  for (int i = 0; i < N; ++i)	\
+    res = res || a[i];		\
+				\
+  /* res should be non-zero.  */\
+  if (!res)			\
+    return 1;			\
+				\
+_Pragma("omp target teams distribute reduction(&&:res) defaultmap(tofrom:scalar)") \
+  for (int i = 0; i < N; ++i)	\
+    res = res && a[i];		\
+				\
+  /* res should be zero.  */	\
+  return res;			\
+}
+
+GENERATE_TEST(char)
+GENERATE_TEST(short)
+GENERATE_TEST(int)
+GENERATE_TEST(long)
+#ifdef __SIZEOF_INT128__
+GENERATE_TEST(__int128)
+#endif
+
+int main(void)
+{
+  if (test_char ())
+    abort ();
+  if (test_short ())
+    abort ();
+  if (test_int ())
+    abort ();
+  if (test_long ())
+    abort ();
+#ifdef __SIZEOF_INT128__
+  if (test___int128 ())
+    abort ();
+#endif
+}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-08-12 20:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-08-12 20:17 [committed] [OG10] Add nvptx support for subword compare-and-swap Kwok Cheung Yeung

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).