public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-4821] core: Support heap-based trampolines
@ 2023-10-22 13:01 Iain D Sandoe
  0 siblings, 0 replies; only message in thread
From: Iain D Sandoe @ 2023-10-22 13:01 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:28d8c680aaea46137170fef2bd1c6a98301518dc

commit r14-4821-g28d8c680aaea46137170fef2bd1c6a98301518dc
Author: Andrew Burgess <andrew.burgess@embecosm.com>
Date:   Sat Aug 5 14:54:11 2023 +0200

    core: Support heap-based trampolines
    
    Generate heap-based nested function trampolines
    
    Add support for allocating nested function trampolines on an
    executable heap rather than on the stack. This is motivated by targets
    such as AArch64 Darwin, which globally prohibit executing code on the
    stack.
    
    The target-specific routines for allocating and writing trampolines are
    to be provided in libgcc.
    
    The gcc flag -ftrampoline-impl controls whether to generate code
    that instantiates trampolines on the stack, or to emit calls to
    __builtin_nested_func_ptr_created and
    __builtin_nested_func_ptr_deleted. Note that this flag is completely
    independent of libgcc: If libgcc is for any reason missing those
    symbols, you will get a link failure.
    
    This implementation imposes some implicit restrictions as compared to
    stack trampolines. longjmp'ing back to a state before a trampoline was
    created will cause us to skip over the corresponding
    __builtin_nested_func_ptr_deleted, which will leak trampolines
    starting from the beginning of the linked list of allocated
    trampolines. There may be scope for instrumenting longjmp/setjmp to
    trigger cleanups of trampolines.
    
    Co-Authored-By: Maxim Blinov <maxim.blinov@embecosm.com>
    Co-Authored-By: Iain Sandoe <iain@sandoe.co.uk>
    Co-Authored-By: Francois-Xavier Coudert <fxcoudert@gcc.gnu.org>
    
    gcc/ChangeLog:
    
            * builtins.def (BUILT_IN_NESTED_PTR_CREATED): Define.
            (BUILT_IN_NESTED_PTR_DELETED): Ditto.
            * common.opt (ftrampoline-impl): Add option to control
            generation of trampoline instantiation (heap or stack).
            * coretypes.h: Define enum trampoline_impl.
            * tree-nested.cc (convert_tramp_reference_op): Don't bother calling
            __builtin_adjust_trampoline for heap trampolines.
            (finalize_nesting_tree_1): Emit calls to
            __builtin_nested_...{created,deleted} if we're generating with
            -ftrampoline-impl=heap.
            * tree.cc (build_common_builtin_nodes): Build
            __builtin_nested_...{created,deleted}.
            * doc/invoke.texi (-ftrampoline-impl): Document.

Diff:
---
 gcc/builtins.def    |   2 +
 gcc/common.opt      |  17 +++++++-
 gcc/coretypes.h     |   6 +++
 gcc/doc/invoke.texi |  17 +++++++-
 gcc/tree-nested.cc  | 121 ++++++++++++++++++++++++++++++++++++++++++++--------
 gcc/tree.cc         |  17 ++++++++
 6 files changed, 161 insertions(+), 19 deletions(-)

diff --git a/gcc/builtins.def b/gcc/builtins.def
index eb6f4ec2034c..e989cd814a5d 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -1074,6 +1074,8 @@ DEF_BUILTIN_STUB (BUILT_IN_ADJUST_TRAMPOLINE, "__builtin_adjust_trampoline")
 DEF_BUILTIN_STUB (BUILT_IN_INIT_DESCRIPTOR, "__builtin_init_descriptor")
 DEF_BUILTIN_STUB (BUILT_IN_ADJUST_DESCRIPTOR, "__builtin_adjust_descriptor")
 DEF_BUILTIN_STUB (BUILT_IN_NONLOCAL_GOTO, "__builtin_nonlocal_goto")
+DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_CREATED, "__builtin_nested_func_ptr_created")
+DEF_BUILTIN_STUB (BUILT_IN_NESTED_PTR_DELETED, "__builtin_nested_func_ptr_deleted")
 
 /* Implementing __builtin_setjmp.  */
 DEF_BUILTIN_STUB (BUILT_IN_SETJMP_SETUP, "__builtin_setjmp_setup")
diff --git a/gcc/common.opt b/gcc/common.opt
index ce34075561f9..1cf3bdd3b518 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2927,10 +2927,25 @@ Common Var(flag_tracer) Optimization
 Perform superblock formation via tail duplication.
 
 ftrampolines
-Common Var(flag_trampolines) Init(0)
+Common Var(flag_trampolines) Init(HEAP_TRAMPOLINES_INIT)
 For targets that normally need trampolines for nested functions, always
 generate them instead of using descriptors.
 
+ftrampoline-impl=
+Common Joined RejectNegative Enum(trampoline_impl) Var(flag_trampoline_impl) Init(HEAP_TRAMPOLINES_INIT ? TRAMPOLINE_IMPL_HEAP : TRAMPOLINE_IMPL_STACK)
+Whether trampolines are generated in executable memory rather than
+executable stack.
+
+Enum
+Name(trampoline_impl) Type(enum trampoline_impl) UnknownError(unknown trampoline implementation %qs)
+
+EnumValue
+Enum(trampoline_impl) String(stack) Value(TRAMPOLINE_IMPL_STACK)
+
+EnumValue
+Enum(trampoline_impl) String(heap) Value(TRAMPOLINE_IMPL_HEAP)
+
+
 ; Zero means that floating-point math operations cannot generate a
 ; (user-visible) trap.  This is the case, for example, in nonstop
 ; IEEE 754 arithmetic.
diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index f86dc169a40b..db7813bdd3d9 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -204,6 +204,12 @@ enum tls_model {
   TLS_MODEL_LOCAL_EXEC
 };
 
+/* Types of trampoline implementation.  */
+enum trampoline_impl {
+  TRAMPOLINE_IMPL_STACK,
+  TRAMPOLINE_IMPL_HEAP
+};
+
 /* Types of ABI for an offload compiler.  */
 enum offload_abi {
   OFFLOAD_ABI_UNSET,
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index aebe9195ef0f..17aaa8cc058a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -718,7 +718,8 @@ Objective-C and Objective-C++ Dialects}.
 -fverbose-asm  -fpack-struct[=@var{n}]
 -fleading-underscore  -ftls-model=@var{model}
 -fstack-reuse=@var{reuse_level}
--ftrampolines  -ftrapv  -fwrapv
+-ftrampolines -ftrampoline-impl=@r{[}stack@r{|}heap@r{]}
+-ftrapv  -fwrapv
 -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
 -fstrict-volatile-bitfields  -fsync-libcalls}
 
@@ -19050,6 +19051,20 @@ For languages other than Ada, the @code{-ftrampolines} and
 trampolines are always generated on platforms that need them
 for nested functions.
 
+@opindex ftrampoline-impl
+@item -ftrampoline-impl=@r{[}stack@r{|}heap@r{]}
+By default, trampolines are generated on stack.  However, certain platforms
+(such as the Apple M1) do not permit an executable stack.  Compiling with
+@option{-ftrampoline-impl=heap} generate calls to
+@code{__builtin_nested_func_ptr_created} and
+@code{__builtin_nested_func_ptr_deleted} in order to allocate and
+deallocate trampoline space on the executable heap.  These functions are
+implemented in libgcc, and will only be provided on specific targets:
+x86_64 Darwin, x86_64 and aarch64 Linux.  @emph{PLEASE NOTE}: Heap
+trampolines are @emph{not} guaranteed to be correctly deallocated if you
+@code{setjmp}, instantiate nested functions, and then @code{longjmp} back
+to a state prior to having allocated those nested functions.
+
 @opindex fvisibility
 @item -fvisibility=@r{[}default@r{|}internal@r{|}hidden@r{|}protected@r{]}
 Set the default ELF image symbol visibility to the specified option---all
diff --git a/gcc/tree-nested.cc b/gcc/tree-nested.cc
index 31c7b6001bd4..d2fe3fca8af9 100644
--- a/gcc/tree-nested.cc
+++ b/gcc/tree-nested.cc
@@ -611,6 +611,14 @@ get_trampoline_type (struct nesting_info *info)
   if (trampoline_type)
     return trampoline_type;
 
+  /* When trampolines are created off-stack then the only thing we need in the
+     local frame is a single pointer.  */
+  if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+    {
+      trampoline_type = build_pointer_type (void_type_node);
+      return trampoline_type;
+    }
+
   align = TRAMPOLINE_ALIGNMENT;
   size = TRAMPOLINE_SIZE;
 
@@ -2790,17 +2798,27 @@ convert_tramp_reference_op (tree *tp, int *walk_subtrees, void *data)
 
       /* Compute the address of the field holding the trampoline.  */
       x = get_frame_field (info, target_context, x, &wi->gsi);
-      x = build_addr (x);
-      x = gsi_gimplify_val (info, x, &wi->gsi);
 
-      /* Do machine-specific ugliness.  Normally this will involve
-	 computing extra alignment, but it can really be anything.  */
-      if (descr)
-	builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+      /* APB: We don't need to do the adjustment calls when using off-stack
+	 trampolines, any such adjustment will be done when the off-stack
+	 trampoline is created.  */
+      if (!descr && flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+	x = gsi_gimplify_val (info, x, &wi->gsi);
       else
-	builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
-      call = gimple_build_call (builtin, 1, x);
-      x = init_tmp_var_with_call (info, &wi->gsi, call);
+	{
+	  x = build_addr (x);
+
+	  x = gsi_gimplify_val (info, x, &wi->gsi);
+
+	  /* Do machine-specific ugliness.  Normally this will involve
+	     computing extra alignment, but it can really be anything.  */
+	  if (descr)
+	    builtin = builtin_decl_implicit (BUILT_IN_ADJUST_DESCRIPTOR);
+	  else
+	    builtin = builtin_decl_implicit (BUILT_IN_ADJUST_TRAMPOLINE);
+	  call = gimple_build_call (builtin, 1, x);
+	  x = init_tmp_var_with_call (info, &wi->gsi, call);
+	}
 
       /* Cast back to the proper function type.  */
       x = build1 (NOP_EXPR, TREE_TYPE (t), x);
@@ -3380,6 +3398,7 @@ build_init_call_stmt (struct nesting_info *info, tree decl, tree field,
 static void
 finalize_nesting_tree_1 (struct nesting_info *root)
 {
+  gimple_seq cleanup_list = NULL;
   gimple_seq stmt_list = NULL;
   gimple *stmt;
   tree context = root->context;
@@ -3511,9 +3530,48 @@ finalize_nesting_tree_1 (struct nesting_info *root)
 	  if (!field)
 	    continue;
 
-	  x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
-	  stmt = build_init_call_stmt (root, i->context, field, x);
-	  gimple_seq_add_stmt (&stmt_list, stmt);
+	  if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+	    {
+	      /* We pass a whole bunch of arguments to the builtin function that
+		 creates the off-stack trampoline, these are
+		 1. The nested function chain value (that must be passed to the
+		 nested function so it can find the function arguments).
+		 2. A pointer to the nested function implementation,
+		 3. The address in the local stack frame where we should write
+		 the address of the trampoline.
+
+		 When this code was originally written I just kind of threw
+		 everything at the builtin, figuring I'd work out what was
+		 actually needed later, I think, the stack pointer could
+		 certainly be dropped, arguments #2 and #4 are based off the
+		 stack pointer anyway, so #1 doesn't seem to add much value.  */
+	      tree arg1, arg2, arg3;
+
+	      gcc_assert (DECL_STATIC_CHAIN (i->context));
+	      arg1 = build_addr (root->frame_decl);
+	      arg2 = build_addr (i->context);
+
+	      x = build3 (COMPONENT_REF, TREE_TYPE (field),
+			  root->frame_decl, field, NULL_TREE);
+	      arg3 = build_addr (x);
+
+	      x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_CREATED);
+	      stmt = gimple_build_call (x, 3, arg1, arg2, arg3);
+	      gimple_seq_add_stmt (&stmt_list, stmt);
+
+	      /* This call to delete the nested function trampoline is added to
+		 the cleanup list, and called when we exit the current scope.  */
+	      x = builtin_decl_implicit (BUILT_IN_NESTED_PTR_DELETED);
+	      stmt = gimple_build_call (x, 0);
+	      gimple_seq_add_stmt (&cleanup_list, stmt);
+	    }
+	  else
+	    {
+	      /* Original code to initialise the on stack trampoline.  */
+	      x = builtin_decl_implicit (BUILT_IN_INIT_TRAMPOLINE);
+	      stmt = build_init_call_stmt (root, i->context, field, x);
+	      gimple_seq_add_stmt (&stmt_list, stmt);
+	    }
 	}
     }
 
@@ -3538,11 +3596,40 @@ finalize_nesting_tree_1 (struct nesting_info *root)
   /* If we created initialization statements, insert them.  */
   if (stmt_list)
     {
-      gbind *bind;
-      annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
-      bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
-      gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
-      gimple_bind_set_body (bind, stmt_list);
+      if (flag_trampoline_impl == TRAMPOLINE_IMPL_HEAP)
+	{
+	  /* Handle off-stack trampolines.  */
+	  gbind *bind;
+	  annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
+	  annotate_all_with_location (cleanup_list, DECL_SOURCE_LOCATION (context));
+	  bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
+	  gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
+
+	  gimple_seq xxx_list = NULL;
+
+	  if (cleanup_list != NULL)
+	    {
+	      /* Maybe we shouldn't be creating this try/finally if -fno-exceptions is
+		 in use.  If this is the case, then maybe we should, instead, be
+		 inserting the cleanup code onto every path out of this function?  Not
+		 yet figured out how we would do this.  */
+	      gtry *t = gimple_build_try (stmt_list, cleanup_list, GIMPLE_TRY_FINALLY);
+	      gimple_seq_add_stmt (&xxx_list, t);
+	    }
+	  else
+	    xxx_list = stmt_list;
+
+	  gimple_bind_set_body (bind, xxx_list);
+	}
+      else
+	{
+	  /* The traditional, on stack trampolines.  */
+	  gbind *bind;
+	  annotate_all_with_location (stmt_list, DECL_SOURCE_LOCATION (context));
+	  bind = gimple_seq_first_stmt_as_a_bind (gimple_body (context));
+	  gimple_seq_add_seq (&stmt_list, gimple_bind_body (bind));
+	  gimple_bind_set_body (bind, stmt_list);
+	}
     }
 
   /* If a chain_decl was created, then it needs to be registered with
diff --git a/gcc/tree.cc b/gcc/tree.cc
index f7bfd9e3451b..f9fa7b78ffff 100644
--- a/gcc/tree.cc
+++ b/gcc/tree.cc
@@ -9922,6 +9922,23 @@ build_common_builtin_nodes (void)
 			"__builtin_nonlocal_goto",
 			ECF_NORETURN | ECF_NOTHROW);
 
+  tree ptr_ptr_type_node = build_pointer_type (ptr_type_node);
+
+  ftype = build_function_type_list (void_type_node,
+				    ptr_type_node, // void *chain
+				    ptr_type_node, // void *func
+				    ptr_ptr_type_node, // void **dst
+				    NULL_TREE);
+  local_define_builtin ("__builtin_nested_func_ptr_created", ftype,
+			BUILT_IN_NESTED_PTR_CREATED,
+			"__builtin_nested_func_ptr_created", ECF_NOTHROW);
+
+  ftype = build_function_type_list (void_type_node,
+				    NULL_TREE);
+  local_define_builtin ("__builtin_nested_func_ptr_deleted", ftype,
+			BUILT_IN_NESTED_PTR_DELETED,
+			"__builtin_nested_func_ptr_deleted", ECF_NOTHROW);
+
   ftype = build_function_type_list (void_type_node,
 				    ptr_type_node, ptr_type_node, NULL_TREE);
   local_define_builtin ("__builtin_setjmp_setup", ftype,

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-10-22 13:01 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-10-22 13:01 [gcc r14-4821] core: Support heap-based trampolines Iain D Sandoe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).