public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-13] nvptx: Add suppport for __builtin_nvptx_brev instrinsic.
@ 2023-05-30 11:26 Tobias Burnus
0 siblings, 0 replies; only message in thread
From: Tobias Burnus @ 2023-05-30 11:26 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:69bbeccefa16bfe9b2f47bb489cd61491d7e7c90
commit 69bbeccefa16bfe9b2f47bb489cd61491d7e7c90
Author: Tobias Burnus <tobias@codesourcery.com>
Date: Tue May 30 11:50:34 2023 +0200
nvptx: Add suppport for __builtin_nvptx_brev instrinsic.
This patch adds support for (a pair of) bit reversal intrinsics
__builtin_nvptx_brev and __builtin_nvptx_brevll which perform 32-bit
and 64-bit bit reversal (using nvptx's brev instruction) matching
the __brev and __brevll instrinsics provided by NVidia's nvcc compiler.
https://docs.nvidia.com/cuda/cuda-math-api/group__CUDA__MATH__INTRINSIC__INT.html
2023-05-21 Roger Sayle <roger@nextmovesoftware.com>
gcc/ChangeLog
* config/nvptx/nvptx.cc (nvptx_expand_brev): Expand target
builtin for bit reversal using brev instruction.
(enum nvptx_builtins): Add NVPTX_BUILTIN_BREV and
NVPTX_BUILTIN_BREVLL.
(nvptx_init_builtins): Define "brev" and "brevll".
(nvptx_expand_builtin): Expand NVPTX_BUILTIN_BREV and
NVPTX_BUILTIN_BREVLL via nvptx_expand_brev function.
* doc/extend.texi (Nvidia PTX Builtin-in Functions): New
section, document __builtin_nvptx_brev{,ll}.
gcc/testsuite/ChangeLog
* gcc.target/nvptx/brev-1.c: New 32-bit test case.
* gcc.target/nvptx/brev-2.c: Likewise.
* gcc.target/nvptx/brevll-1.c: New 64-bit test case.
* gcc.target/nvptx/brevll-2.c: Likewise.
(cherry picked from commit c09471fbc7588db2480f036aa56a2403d3c03ae5)
Diff:
---
gcc/ChangeLog.omp | 15 +++
gcc/config/nvptx/nvptx.cc | 32 +++++++
gcc/doc/extend.texi | 15 +++
gcc/testsuite/ChangeLog.omp | 10 ++
gcc/testsuite/gcc.target/nvptx/brev-1.c | 8 ++
gcc/testsuite/gcc.target/nvptx/brev-2.c | 94 ++++++++++++++++++
gcc/testsuite/gcc.target/nvptx/brevll-1.c | 8 ++
gcc/testsuite/gcc.target/nvptx/brevll-2.c | 154 ++++++++++++++++++++++++++++++
8 files changed, 336 insertions(+)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index f25b3f36800..7da4df4f961 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,18 @@
+2023-05-30 Tobias Burnus <tobias@codesourcery.com>
+
+ Backport from master:
+ 2023-05-21 Roger Sayle <roger@nextmovesoftware.com>
+
+ * config/nvptx/nvptx.cc (nvptx_expand_brev): Expand target
+ builtin for bit reversal using brev instruction.
+ (enum nvptx_builtins): Add NVPTX_BUILTIN_BREV and
+ NVPTX_BUILTIN_BREVLL.
+ (nvptx_init_builtins): Define "brev" and "brevll".
+ (nvptx_expand_builtin): Expand NVPTX_BUILTIN_BREV and
+ NVPTX_BUILTIN_BREVLL via nvptx_expand_brev function.
+ * doc/extend.texi (Nvidia PTX Builtin-in Functions): New
+ section, document __builtin_nvptx_brev{,ll}.
+
2023-05-30 Tobias Burnus <tobias@codesourcery.com>
Backport from master:
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 9767b123bb3..1cac62b698a 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -6324,6 +6324,29 @@ nvptx_expand_shuffle (tree exp, rtx target, machine_mode mode, int ignore)
return target;
}
+/* Expander for the bit reverse builtins. */
+
+static rtx
+nvptx_expand_brev (tree exp, rtx target, machine_mode mode, int ignore)
+{
+ if (ignore)
+ return target;
+
+ rtx arg = expand_expr (CALL_EXPR_ARG (exp, 0),
+ NULL_RTX, mode, EXPAND_NORMAL);
+ if (!REG_P (arg))
+ arg = copy_to_mode_reg (mode, arg);
+ if (!target)
+ target = gen_reg_rtx (mode);
+ rtx pat;
+ if (mode == SImode)
+ pat = gen_bitrevsi2 (target, arg);
+ else
+ pat = gen_bitrevdi2 (target, arg);
+ emit_insn (pat);
+ return target;
+}
+
const char *
nvptx_output_red_partition (rtx dst, rtx offset)
{
@@ -6456,6 +6479,8 @@ enum nvptx_builtins
NVPTX_BUILTIN_BAR_RED_AND,
NVPTX_BUILTIN_BAR_RED_OR,
NVPTX_BUILTIN_BAR_RED_POPC,
+ NVPTX_BUILTIN_BREV,
+ NVPTX_BUILTIN_BREVLL,
NVPTX_BUILTIN_COND_UNI,
NVPTX_BUILTIN_MAX
};
@@ -6586,6 +6611,9 @@ nvptx_init_builtins (void)
DEF (BAR_RED_POPC, "bar_red_popc",
(UINT, UINT, UINT, UINT, UINT, NULL_TREE));
+ DEF (BREV, "brev", (UINT, UINT, NULL_TREE));
+ DEF (BREVLL, "brevll", (LLUINT, LLUINT, NULL_TREE));
+
#undef DEF
#undef ST
#undef UINT
@@ -6633,6 +6661,10 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget),
case NVPTX_BUILTIN_BAR_RED_POPC:
return nvptx_expand_bar_red (exp, target, mode, ignore);
+ case NVPTX_BUILTIN_BREV:
+ case NVPTX_BUILTIN_BREVLL:
+ return nvptx_expand_brev (exp, target, mode, ignore);
+
case NVPTX_BUILTIN_COND_UNI:
return nvptx_expand_cond_uni (exp, target, mode, ignore);
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index ac476801ed8..871f0cf7974 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -14682,6 +14682,7 @@ instructions, but allow the compiler to schedule those calls.
* Other MIPS Built-in Functions::
* MSP430 Built-in Functions::
* NDS32 Built-in Functions::
+* Nvidia PTX Built-in Functions::
* Basic PowerPC Built-in Functions::
* PowerPC AltiVec/VSX Built-in Functions::
* PowerPC Hardware Transactional Memory Built-in Functions::
@@ -17941,6 +17942,20 @@ Enable global interrupt.
Disable global interrupt.
@enddefbuiltin
+@node Nvidia PTX Built-in Functions
+@subsection Nvidia PTX Built-in Functions
+
+These built-in functions are available for the Nvidia PTX target:
+
+@defbuiltin{unsigned int __builtin_nvptx_brev (unsigned int @var{x})}
+Reverse the bit order of a 32-bit unsigned integer.
+Disable global interrupt.
+@enddefbuiltin
+
+@defbuiltin{unsigned long long __builtin_nvptx_brevll (unsigned long long @var{x})}
+Reverse the bit order of a 64-bit unsigned integer.
+@enddefbuiltin
+
@node Basic PowerPC Built-in Functions
@subsection Basic PowerPC Built-in Functions
diff --git a/gcc/testsuite/ChangeLog.omp b/gcc/testsuite/ChangeLog.omp
index 0e995ef8032..69f0bcfd506 100644
--- a/gcc/testsuite/ChangeLog.omp
+++ b/gcc/testsuite/ChangeLog.omp
@@ -1,3 +1,13 @@
+2023-05-30 Tobias Burnus <tobias@codesourcery.com>
+
+ Backport from master:
+ 2023-05-21 Roger Sayle <roger@nextmovesoftware.com>
+
+ * gcc.target/nvptx/brev-1.c: New 32-bit test case.
+ * gcc.target/nvptx/brev-2.c: Likewise.
+ * gcc.target/nvptx/brevll-1.c: New 64-bit test case.
+ * gcc.target/nvptx/brevll-2.c: Likewise.
+
2023-05-30 Tobias Burnus <tobias@codesourcery.com>
Backported from master:
diff --git a/gcc/testsuite/gcc.target/nvptx/brev-1.c b/gcc/testsuite/gcc.target/nvptx/brev-1.c
new file mode 100644
index 00000000000..fbb4fff1e59
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/brev-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+unsigned int foo(unsigned int x)
+{
+ return __builtin_nvptx_brev(x);
+}
+
+/* { dg-final { scan-assembler "brev.b32" } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/brev-2.c b/gcc/testsuite/gcc.target/nvptx/brev-2.c
new file mode 100644
index 00000000000..9d0defe80bb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/brev-2.c
@@ -0,0 +1,94 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+unsigned int bitreverse32(unsigned int x)
+{
+ return __builtin_nvptx_brev(x);
+}
+
+int main(void)
+{
+ if (bitreverse32(0x00000000) != 0x00000000)
+ __builtin_abort();
+ if (bitreverse32(0xffffffff) != 0xffffffff)
+ __builtin_abort();
+
+ if (bitreverse32(0x00000001) != 0x80000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000002) != 0x40000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000004) != 0x20000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000008) != 0x10000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000010) != 0x08000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000020) != 0x04000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000040) != 0x02000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000080) != 0x01000000)
+ __builtin_abort();
+ if (bitreverse32(0x00000100) != 0x00800000)
+ __builtin_abort();
+ if (bitreverse32(0x00000200) != 0x00400000)
+ __builtin_abort();
+ if (bitreverse32(0x00000400) != 0x00200000)
+ __builtin_abort();
+ if (bitreverse32(0x00000800) != 0x00100000)
+ __builtin_abort();
+ if (bitreverse32(0x00001000) != 0x00080000)
+ __builtin_abort();
+ if (bitreverse32(0x00002000) != 0x00040000)
+ __builtin_abort();
+ if (bitreverse32(0x00004000) != 0x00020000)
+ __builtin_abort();
+ if (bitreverse32(0x00008000) != 0x00010000)
+ __builtin_abort();
+ if (bitreverse32(0x00010000) != 0x00008000)
+ __builtin_abort();
+ if (bitreverse32(0x00020000) != 0x00004000)
+ __builtin_abort();
+ if (bitreverse32(0x00040000) != 0x00002000)
+ __builtin_abort();
+ if (bitreverse32(0x00080000) != 0x00001000)
+ __builtin_abort();
+ if (bitreverse32(0x00100000) != 0x00000800)
+ __builtin_abort();
+ if (bitreverse32(0x00200000) != 0x00000400)
+ __builtin_abort();
+ if (bitreverse32(0x00400000) != 0x00000200)
+ __builtin_abort();
+ if (bitreverse32(0x00800000) != 0x00000100)
+ __builtin_abort();
+ if (bitreverse32(0x01000000) != 0x00000080)
+ __builtin_abort();
+ if (bitreverse32(0x02000000) != 0x00000040)
+ __builtin_abort();
+ if (bitreverse32(0x04000000) != 0x00000020)
+ __builtin_abort();
+ if (bitreverse32(0x08000000) != 0x00000010)
+ __builtin_abort();
+ if (bitreverse32(0x10000000) != 0x00000008)
+ __builtin_abort();
+ if (bitreverse32(0x20000000) != 0x00000004)
+ __builtin_abort();
+ if (bitreverse32(0x40000000) != 0x00000002)
+ __builtin_abort();
+ if (bitreverse32(0x80000000) != 0x00000001)
+ __builtin_abort();
+
+ if (bitreverse32(0x01234567) != 0xe6a2c480)
+ __builtin_abort();
+ if (bitreverse32(0xe6a2c480) != 0x01234567)
+ __builtin_abort();
+ if (bitreverse32(0xdeadbeef) != 0xf77db57b)
+ __builtin_abort();
+ if (bitreverse32(0xf77db57b) != 0xdeadbeef)
+ __builtin_abort();
+ if (bitreverse32(0xcafebabe) != 0x7d5d7f53)
+ __builtin_abort();
+ if (bitreverse32(0x7d5d7f53) != 0xcafebabe)
+ __builtin_abort();
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/nvptx/brevll-1.c b/gcc/testsuite/gcc.target/nvptx/brevll-1.c
new file mode 100644
index 00000000000..7009d5f5f8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/brevll-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+unsigned long foo(unsigned long x)
+{
+ return __builtin_nvptx_brevll(x);
+}
+
+/* { dg-final { scan-assembler "brev.b64" } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/brevll-2.c b/gcc/testsuite/gcc.target/nvptx/brevll-2.c
new file mode 100644
index 00000000000..56054b1e92a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/brevll-2.c
@@ -0,0 +1,154 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+unsigned long long bitreverse64(unsigned long long x)
+{
+ return __builtin_nvptx_brevll(x);
+}
+
+int main(void)
+{
+ if (bitreverse64(0x0000000000000000ll) != 0x0000000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0xffffffffffffffffll) != 0xffffffffffffffffll)
+ __builtin_abort();
+
+ if (bitreverse64(0x0000000000000001ll) != 0x8000000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000002ll) != 0x4000000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000004ll) != 0x2000000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000008ll) != 0x1000000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000010ll) != 0x0800000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000020ll) != 0x0400000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000040ll) != 0x0200000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000080ll) != 0x0100000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000100ll) != 0x0080000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000200ll) != 0x0040000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000400ll) != 0x0020000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000000800ll) != 0x0010000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000001000ll) != 0x0008000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000002000ll) != 0x0004000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000004000ll) != 0x0002000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000008000ll) != 0x0001000000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000010000ll) != 0x0000800000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000020000ll) != 0x0000400000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000040000ll) != 0x0000200000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000080000ll) != 0x0000100000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000100000ll) != 0x0000080000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000200000ll) != 0x0000040000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000400000ll) != 0x0000020000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000000800000ll) != 0x0000010000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000001000000ll) != 0x0000008000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000002000000ll) != 0x0000004000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000004000000ll) != 0x0000002000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000008000000ll) != 0x0000001000000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000010000000ll) != 0x0000000800000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000020000000ll) != 0x0000000400000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000040000000ll) != 0x0000000200000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000080000000ll) != 0x0000000100000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000100000000ll) != 0x0000000080000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000200000000ll) != 0x0000000040000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000400000000ll) != 0x0000000020000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000000800000000ll) != 0x0000000010000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000001000000000ll) != 0x0000000008000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000002000000000ll) != 0x0000000004000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000004000000000ll) != 0x0000000002000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000008000000000ll) != 0x0000000001000000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000010000000000ll) != 0x0000000000800000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000020000000000ll) != 0x0000000000400000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000040000000000ll) != 0x0000000000200000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000080000000000ll) != 0x0000000000100000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000100000000000ll) != 0x0000000000080000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000200000000000ll) != 0x0000000000040000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000400000000000ll) != 0x0000000000020000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0000800000000000ll) != 0x0000000000010000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0001000000000000ll) != 0x0000000000008000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0002000000000000ll) != 0x0000000000004000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0004000000000000ll) != 0x0000000000002000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0008000000000000ll) != 0x0000000000001000ll)
+ __builtin_abort();
+ if (bitreverse64(0x0010000000000000ll) != 0x0000000000000800ll)
+ __builtin_abort();
+ if (bitreverse64(0x0020000000000000ll) != 0x0000000000000400ll)
+ __builtin_abort();
+ if (bitreverse64(0x0040000000000000ll) != 0x0000000000000200ll)
+ __builtin_abort();
+ if (bitreverse64(0x0080000000000000ll) != 0x0000000000000100ll)
+ __builtin_abort();
+ if (bitreverse64(0x0100000000000000ll) != 0x0000000000000080ll)
+ __builtin_abort();
+ if (bitreverse64(0x0200000000000000ll) != 0x0000000000000040ll)
+ __builtin_abort();
+ if (bitreverse64(0x0400000000000000ll) != 0x0000000000000020ll)
+ __builtin_abort();
+ if (bitreverse64(0x0800000000000000ll) != 0x0000000000000010ll)
+ __builtin_abort();
+ if (bitreverse64(0x1000000000000000ll) != 0x0000000000000008ll)
+ __builtin_abort();
+ if (bitreverse64(0x2000000000000000ll) != 0x0000000000000004ll)
+ __builtin_abort();
+ if (bitreverse64(0x4000000000000000ll) != 0x0000000000000002ll)
+ __builtin_abort();
+ if (bitreverse64(0x8000000000000000ll) != 0x0000000000000001ll)
+ __builtin_abort();
+
+ if (bitreverse64(0x0123456789abcdefll) != 0xf7b3d591e6a2c480ll)
+ __builtin_abort();
+ if (bitreverse64(0xf7b3d591e6a2c480ll) != 0x0123456789abcdefll)
+ __builtin_abort();
+ if (bitreverse64(0xdeadbeefcafebabell) != 0x7d5d7f53f77db57bll)
+ __builtin_abort();
+ if (bitreverse64(0x7d5d7f53f77db57bll) != 0xdeadbeefcafebabell)
+ __builtin_abort();
+ return 0;
+}
+
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-05-30 11:26 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-30 11:26 [gcc/devel/omp/gcc-13] nvptx: Add suppport for __builtin_nvptx_brev instrinsic Tobias Burnus
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).