From: Tobias Burnus <tobias@codesourcery.com>
To: Andrew Stubbs <ams@codesourcery.com>,
gcc-patches <gcc-patches@gcc.gnu.org>
Subject: [Patch] gcn: Add __builtin_gcn_{get_stack_limit,first_call_this_thread_p}
Date: Fri, 18 Nov 2022 18:20:29 +0100 [thread overview]
Message-ID: <1bec26d6-e2c5-3408-4f61-0fb17e730b3e@codesourcery.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 982 bytes --]
This patch adds two builtins (getting end-of-stack pointer and
a Boolean answer whether it was the first call to the builtin on this thread).
The idea is to replace some hard-coded values in newlib, permitting to move
later to a manually allocated stack on the compiler side without the need to
modify newlib again. The GCC patch matches what newlib did in reent; I could
imagine that we change this later on.
Lightly tested (especially by visual inspection).
Currently doing a final regtest, OK when it passes?
Any comments to this patch - or the attached newlib patch?*
Tobias
(*) I also included a patch to newlib to see where were are heading
+ to actually use them for regtesting ...
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955
[-- Attachment #2: gcn-stack-init.diff --]
[-- Type: text/x-patch, Size: 5430 bytes --]
gcn: Add __builtin_gcn_{get_stack_limit,first_call_this_thread_p}
The new builtins have been added for newlib to reduce dependency on
compiler-internal implementation choices of GCC in newlibs' getreent.c.
gcc/ChangeLog:
* config/gcn/gcn-builtins.def (FIRST_CALL_THIS_THREAD_P,
GET_STACK_LIMIT): Add new builtins.
* config/gcn/gcn.cc (gcn_expand_builtin_1): Expand them.
* config/gcn/gcn.md (prologue_use): Add "register_operand" as
arg to match_operand.
(prologue_use_di): New; DI insn_and_split variant of the former.
Co-Authored-By: Andrew Stubbs <ams@codesourcery.com>
gcc/config/gcn/gcn-builtins.def | 4 +++
gcc/config/gcn/gcn.cc | 70 ++++++++++++++++++++++++++++++++++++++++-
gcc/config/gcn/gcn.md | 15 ++++++++-
3 files changed, 87 insertions(+), 2 deletions(-)
diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
index eeeaebf9013..f1cf30bbc94 100644
--- a/gcc/config/gcn/gcn-builtins.def
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -160,8 +160,12 @@ DEF_BUILTIN (ACC_BARRIER, -1, "acc_barrier", B_INSN, _A1 (GCN_BTI_VOID),
/* Kernel inputs. */
+DEF_BUILTIN (FIRST_CALL_THIS_THREAD_P, -1, "first_call_this_thread_p", B_INSN,
+ _A1 (GCN_BTI_BOOL), gcn_expand_builtin_1)
DEF_BUILTIN (KERNARG_PTR, -1, "kernarg_ptr", B_INSN, _A1 (GCN_BTI_VOIDPTR),
gcn_expand_builtin_1)
+DEF_BUILTIN (GET_STACK_LIMIT, -1, "get_stack_limit", B_INSN,
+ _A1 (GCN_BTI_VOIDPTR), gcn_expand_builtin_1)
#undef _A1
#undef _A2
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index b3814c2e7c6..051eadee783 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4493,6 +4493,44 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
emit_insn (gen_gcn_wavefront_barrier ());
return target;
+ case GCN_BUILTIN_GET_STACK_LIMIT:
+ {
+ /* stackbase = (stack_segment_decr & 0x0000ffffffffffff)
+ + stack_wave_offset);
+ seg_size = dispatch_ptr->private_segment_size;
+ stacklimit = stackbase + seg_size*64;
+ with segsize = dispatch_ptr + 6*sizeof(int16_t) + 3*sizeof(int32_t);
+ cf. struct hsa_kernel_dispatch_packet_s in the HSA doc. */
+ rtx ptr;
+ if (cfun->machine->args.reg[DISPATCH_PTR_ARG] >= 0
+ && cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
+ {
+ rtx size_rtx = gen_rtx_REG (DImode,
+ cfun->machine->args.reg[DISPATCH_PTR_ARG]);
+ size_rtx = gen_rtx_MEM (DImode,
+ gen_rtx_PLUS (DImode, size_rtx,
+ GEN_INT (6*16 + 3*32)));
+ size_rtx = gen_rtx_MULT (DImode, size_rtx, GEN_INT (64));
+
+ ptr = gen_rtx_REG (DImode,
+ cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]);
+ ptr = gen_rtx_AND (DImode, ptr, GEN_INT (0x0000ffffffffffff));
+ ptr = gen_rtx_PLUS (DImode, ptr, size_rtx);
+ if (cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG] >= 0)
+ {
+ rtx off;
+ off = gen_rtx_REG (SImode,
+ cfun->machine->args.reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);
+ ptr = gen_rtx_PLUS (DImode, ptr, off);
+ }
+ }
+ else
+ {
+ ptr = gen_reg_rtx (DImode);
+ emit_move_insn (ptr, const0_rtx);
+ }
+ return ptr;
+ }
case GCN_BUILTIN_KERNARG_PTR:
{
rtx ptr;
@@ -4506,7 +4544,37 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ ,
}
return ptr;
}
-
+ case GCN_BUILTIN_FIRST_CALL_THIS_THREAD_P:
+ {
+ /* Stash a marker in the unused upper 16 bits of s[0:1] to indicate
+ whether it was the first call. */
+ rtx result = gen_reg_rtx (BImode);
+ emit_move_insn (result, const0_rtx);
+ if (cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG] >= 0)
+ {
+ rtx not_first = gen_label_rtx ();
+ rtx reg = gen_rtx_REG (DImode,
+ cfun->machine->args.reg[PRIVATE_SEGMENT_BUFFER_ARG]);
+ rtx cmp = force_reg (DImode,
+ gen_rtx_AND (DImode, reg,
+ GEN_INT (0xffff000000000000L)));
+ emit_insn (gen_cstoresi4 (result, gen_rtx_EQ (BImode, cmp,
+ GEN_INT(12345L << 48)),
+ cmp, GEN_INT(12345L << 48)));
+ emit_jump_insn (gen_cjump (not_first, gen_rtx_EQ (BImode, result,
+ const0_rtx),
+ result));
+ emit_move_insn (reg,
+ force_reg (DImode,
+ gen_rtx_IOR (DImode,
+ gen_rtx_AND (DImode, reg,
+ GEN_INT (0x0000ffffffffffffL)),
+ GEN_INT (12345L << 48))));
+ emit_insn (gen_prologue_use (reg));
+ emit_label (not_first);
+ }
+ return result;
+ }
default:
gcc_unreachable ();
}
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index 987b76396cc..a8b9c28d115 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -692,11 +692,24 @@
;; {{{ Prologue/Epilogue
(define_insn "prologue_use"
- [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)]
+ [(unspec_volatile [(match_operand 0 "register_operand")] UNSPECV_PROLOGUE_USE)]
""
""
[(set_attr "length" "0")])
+(define_insn_and_split "prologue_use_di"
+ [(unspec_volatile [(match_operand:DI 0 "register_operand")] UNSPECV_PROLOGUE_USE)]
+ ""
+ "#"
+ "reload_completed"
+ [(unspec_volatile [(match_dup 0)] UNSPECV_PROLOGUE_USE)
+ (unspec_volatile [(match_dup 1)] UNSPECV_PROLOGUE_USE)]
+ {
+ operands[1] = gcn_operand_part (DImode, operands[0], 1);
+ operands[0] = gcn_operand_part (DImode, operands[0], 0);
+ }
+ [(set_attr "length" "0")])
+
(define_expand "prologue"
[(const_int 0)]
""
[-- Attachment #3: newlib-reent.diff --]
[-- Type: text/x-patch, Size: 2594 bytes --]
amdgcn: Use __builtin_gcn_ in libc/machine/amdgcn/getreent.c
Call __builtin_gcn_get_stack_limit and __builtin_gcn_first_call_this_thread_p
to reduce dependency on some register/layout assumptions by using the new
GCC mainline (GCC 13) builtins, if they are available. If not, the existing
code is used.
newlib/libc/machine/amdgcn/getreent.c | 38 ++++++++++++++++++++++++++---------
1 file changed, 29 insertions(+), 9 deletions(-)
diff --git a/newlib/libc/machine/amdgcn/getreent.c b/newlib/libc/machine/amdgcn/getreent.c
index be7d2edc9..ef731f649 100644
--- a/newlib/libc/machine/amdgcn/getreent.c
+++ b/newlib/libc/machine/amdgcn/getreent.c
@@ -29,22 +29,42 @@ typedef struct hsa_kernel_dispatch_packet_s {
struct _reent *
__getreent (void)
{
- /* Place the reent data at the top of the stack allocation.
- s[0:1] contains a 48-bit private segment base address.
+ /* Place the reent data at the top of the stack allocation. */
+ struct data {
+ int marker;
+ struct _reent reent;
+ } *data;
+
+#if defined(__has_builtin) \
+ && __has_builtin(__builtin_gcn_get_stack_limit) \
+ && __has_builtin(__builtin_gcn_first_call_this_thread_p)
+ unsigned long addr = (((unsigned long) __builtin_gcn_get_stack_limit()
+ - sizeof(struct data)) & ~7);
+ data = (struct data *)addr;
+
+ register long sp asm("s16");
+
+ if (sp >= addr)
+ goto stackoverflow;
+ if (__builtin_gcn_first_call_this_thread_p())
+ {
+ data->marker = 12345;
+ __builtin_memset (&data->reent, 0, sizeof(struct _reent));
+ _REENT_INIT_PTR_ZEROED (&data->reent);
+ }
+ else if (data->marker != 12345)
+ goto stackoverflow;
+#else
+ /* s[0:1] contains a 48-bit private segment base address.
s11 contains the offset to the base of the stack.
s[4:5] contains the dispatch pointer.
-
+
WARNING: this code will break if s[0:1] is ever used for anything! */
const register unsigned long buffer_descriptor asm("s0");
unsigned long private_segment = buffer_descriptor & 0x0000ffffffffffff;
const register unsigned int stack_offset asm("s11");
const register hsa_kernel_dispatch_packet_t *dispatch_ptr asm("s4");
- struct data {
- int marker;
- struct _reent reent;
- } *data;
-
unsigned long stack_base = private_segment + stack_offset;
unsigned long stack_end = stack_base + dispatch_ptr->private_segment_size * 64;
unsigned long addr = (stack_end - sizeof(struct data)) & ~7;
@@ -69,7 +89,7 @@ __getreent (void)
}
else if (data->marker != 12345)
goto stackoverflow;
-
+#endif
return &data->reent;
next reply other threads:[~2022-11-18 17:20 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-18 17:20 Tobias Burnus [this message]
2022-11-18 17:49 ` Andrew Stubbs
2022-11-19 10:46 ` Tobias Burnus
2022-11-20 0:23 ` Andrew Stubbs
2022-11-21 13:41 ` Tobias Burnus
2022-11-21 14:58 ` Stubbs, Andrew
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1bec26d6-e2c5-3408-4f61-0fb17e730b3e@codesourcery.com \
--to=tobias@codesourcery.com \
--cc=ams@codesourcery.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).