public inbox for systemtap@sourceware.org
 help / color / mirror / Atom feed
* [PR6580; patch for review] first step of unwind_cache enhancement
       [not found] <293195834.4565580.1346769465759.JavaMail.root@redhat.com>
@ 2012-09-05 15:43 ` Serguei Makarov
  2012-09-06 22:17   ` Frank Ch. Eigler
                     ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Serguei Makarov @ 2012-09-05 15:43 UTC (permalink / raw)
  To: systemtap; +Cc: Frank Ch. Eigler, Mark Wielaard

[-- Attachment #1: Type: text/plain, Size: 567 bytes --]

The first incremental step to the runtime tweaks required to sanely implement PR6580. Namely, the kernel-side backtrace can be unwound one step at a time using _stp_stack_kernel_get, with the already unwound portion of the backtrace cached within the probe context.

This allows us to implement stack(n) (which returns the PC n levels deep within the stack) without having to resort to tokenizing a backtrace string each time.

The patch itself seems to be sound, but testing it drew attention to memory corruption errors in the existing DWARF unwinder (see PR14546).

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: uwcache.patch --]
[-- Type: text/x-patch; name=uwcache.patch, Size: 12177 bytes --]

diff --git a/runtime/common_probe_context.h b/runtime/common_probe_context.h
index df07b90..61c1b26 100644
--- a/runtime/common_probe_context.h
+++ b/runtime/common_probe_context.h
@@ -118,5 +118,12 @@ cycles_t cycles_sum;
 
 /* Current state of the unwinder (as used in the unwind.c dwarf unwinder). */
 #if defined(STP_NEED_UNWIND_DATA)
-struct unwind_context uwcontext;
+// Invariants (uwcontext currently refers to uwcontext_kernel):
+//   uwcache.depth == 0 --> uwcontext needs to be initialized
+//   uwcache.depth > 0  --> uwcontext is state after uwcache.depth unwind steps
+//   uwcache.depth >= n --> uwcache.data[n] contains PC at depth n
+//     EXCEPT WHEN n == 0, uwcache.data[n] == 0 (current PC not yet retrieved)
+struct unwind_cache uwcache;
+struct unwind_context uwcontext_user;
+struct unwind_context uwcontext_kernel;
 #endif
diff --git a/runtime/stack-dwarf.c b/runtime/stack-dwarf.c
index 9c55997..b00ac1d 100644
--- a/runtime/stack-dwarf.c
+++ b/runtime/stack-dwarf.c
@@ -35,6 +35,55 @@ static int _stp_valid_pc_addr(unsigned long addr, struct task_struct *tsk)
 #endif
 }
 
+/* NB: Returns 0 for depth == 0 -- DWARF unwinder does not fetch
+ * current PC.  Moreover, uwcontext is assumed to have been
+ * initialized by the caller. */
+unsigned long __stp_dwarf_stack_kernel_get(struct pt_regs *regs, int depth,
+                                           struct unwind_context *uwcontext,
+                                           struct unwind_cache *uwcache)
+{
+        struct unwind_frame_info *info = &uwcontext->info;
+        int ret;
+  
+        /* check if answer is already defined in cache; this is probably
+         * redundant, being already handled for us by the caller */
+        if (unlikely(uwcache->depth >= depth))
+                return uwcache->data[depth];
+
+        /* check if unwind cannot proceed further */
+        if (uwcache->done)
+                return 0;
+
+        if (uwcache->depth == 0) /* need to initialize uwcontext->info */
+                arch_unw_init_frame_info(info, regs, 0);
+
+        dbug_unwind(1, "RESUMING from %d for %d more levels",
+                    uwcache->depth, depth - uwcache->depth);
+
+        while (uwcache->depth < depth) {
+                ret = unwind(uwcontext, 0);
+                dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
+
+                /* check if unwind hit an error */
+                if (ret || ! _stp_valid_pc_addr(UNW_PC(info), NULL)) {
+                        uwcache->done = 1; break;
+                }
+
+                uwcache->depth++;
+                uwcache->data[uwcache->depth] = UNW_PC(info);
+
+                if (UNW_PC(info) == _stp_kretprobe_trampoline) {
+                        uwcache->done = 1; break;
+                        /* XXX: if there a way to unwind across kretprobe trampolines? PR9999 */
+                }
+        }
+
+        /* unwind may not have reached the desired depth */
+        return uwcache->depth >= depth ? uwcache->data[depth] : 0;    
+}
+
+
+// XXX: rewrite in terms of _get
 static void __stp_dwarf_stack_kernel_print(struct pt_regs *regs, int verbose,
 			      int levels, struct unwind_context *uwcontext)
 {
diff --git a/runtime/stack.c b/runtime/stack.c
index 413ef1e..1834298 100644
--- a/runtime/stack.c
+++ b/runtime/stack.c
@@ -165,7 +165,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
       else if (c->uregs != NULL && c->kregs != NULL
 	       && ! (c->probe_flags & _STP_PROBE_STATE_USER_MODE))
 	{
-	  struct unwind_frame_info *info = &c->uwcontext.info;
+	  struct unwind_frame_info *info = &c->uwcontext_user.info;
 	  int ret = 0;
 	  int levels;
 
@@ -189,7 +189,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 	  while (levels > 0 && ret == 0 && UNW_PC(info) != REG_IP(c->uregs))
 	    {
 	      levels--;
-	      ret = unwind(&c->uwcontext, 0);
+	      ret = unwind(&c->uwcontext_user, 0);
 	      dbug_unwind(1, "unwind levels: %d, ret: %d, pc=0x%lx\n",
 			  levels, ret, UNW_PC(info));
 	    }
@@ -213,6 +213,66 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
   return c->uregs;
 }
 
+/** Returns the PC at given depth in the stack.
+ * @param depth Depth to which to descend in the stack. Depth 0 gives the current PC.
+ *
+ * Unwinds the backtrace at most once and uses a cache for subsequent
+ * queries.  Returns 0 on error, or beyond the end of the visible
+ * stack.
+ */
+static unsigned long _stp_stack_kernel_get(struct context *c, unsigned depth)
+{
+        /* TODOXXX add appropriate dbug_unwind messages */
+        struct pt_regs *regs = NULL;
+
+        if (depth >= MAXBACKTRACE)
+                return 0; /* special case which cannot query the cache */
+
+        /* We always start by fetching the current PC if it's not yet known. */
+        if (c->uwcache.depth == 0 && c->uwcache.data[0] == 0) {
+                if (! c->kregs) {
+                        /* Even the current PC is unknown; so we have absolutely no data
+                         * at any depth. Note that unlike _stp_stack_kernel_print(), we
+                         * can't fall back to calling dump_trace() to obtain the
+                         * backtrace. */
+                        c->uwcache.done = 1; return 0;
+                } else if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE
+                           && c->ips.krp.pi) {
+                        c->uwcache.data[0] = (unsigned long)_stp_ret_addr_r(c->ips.krp.pi);
+                } else {
+                        c->uwcache.data[0] = REG_IP(c->kregs);
+                }
+        }
+
+        /* check if answer is already defined in cache */
+        if (c->uwcache.depth >= depth)
+                return c->uwcache.data[depth];
+
+        /* check if unwind cannot proceed further */
+        if (c->uwcache.done)
+                return 0;
+
+#ifdef STP_USE_DWARF_UNWINDER
+        if (unlikely (! c->kregs)) { /* XXX a bit of extra paranoia */
+                c->uwcache.done = 1; return 0;
+        }
+        regs = c->kregs;
+
+        if (c->uwcache.depth == 0) { /* need to initialize uwcontext */
+                if (c->uwcache.depth == 0 && c->uregs == &c->uwcontext_kernel.info.regs) {
+                        /* Unwinder needs the reg state, clear uregs ref. */
+                        c->uregs = NULL;
+                        c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+                }
+        }
+        return __stp_dwarf_stack_kernel_get(regs, depth, &c->uwcontext_kernel, &c->uwcache);
+#else
+        /* don't use arch specific fallbacks */
+        c->uwcache.done = 1;
+        return 0;
+#endif
+}
+
 /** Prints the stack backtrace
  * @param regs A pointer to the struct pt_regs.
  * @param verbose _STP_SYM_FULL or _STP_SYM_BRIEF
@@ -220,10 +280,14 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 
 static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 {
-	struct pt_regs *regs = NULL;
+        unsigned n, remaining;
+        unsigned long l;
+
+        if (! c->kregs) {
+                /* This is a fatal block for _stp_stack_kernel_get,
+                   but when printing a backtrace we can use this
+                   inexact fallback.
 
-	if (! c->kregs) {
-		/* For the kernel we can use an inexact fallback.
 		   When compiled with frame pointers we can do
 		   a pretty good guess at the stack value,
 		   otherwise let dump_stack guess it
@@ -248,36 +312,33 @@ static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 			_stp_print("\n");
 #endif
 		return;
-	} else {
-		regs = c->kregs;
-	}
+        }
+
+        /* print the current address */
+        if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi
+            && (sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
+                _stp_print("Returning from: ");
+                _stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
+                                sym_flags, NULL);
+                _stp_print("Returning to  : ");
+        }
+        _stp_print_addr(_stp_stack_kernel_get(c, 0), sym_flags, NULL);
 
-	/* print the current address */
-	if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi) {
-		if ((sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
-			_stp_print("Returning from: ");
-			_stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
-					sym_flags, NULL);
-			_stp_print("Returning to  : ");
-		}
-		_stp_print_addr((unsigned long)_stp_ret_addr_r(c->ips.krp.pi),
-				sym_flags, NULL);
-	} else {
-		_stp_print_addr(REG_IP(regs), sym_flags, NULL);
-	}
-
-	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
-		/* Unwinder needs the reg state, clear uregs ref. */
-		c->uregs = NULL;
-		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
-	}
-	__stp_dwarf_stack_kernel_print(regs, sym_flags, MAXBACKTRACE,
-				       &c->uwcontext);
+        for (n = 1; n < MAXBACKTRACE; n++) {
+                l = _stp_stack_kernel_get(c, n);
+                if (l == 0) {
+                        remaining = MAXBACKTRACE - n;
+                        _stp_stack_print_fallback(UNW_SP(&c->uwcontext_kernel.info),
+                                                  sym_flags, remaining, 0);
+                        break;
+                } else {
+                        _stp_print_addr(l, sym_flags, NULL);
+                }
+        }
 #else
 	/* Arch specific fallback for kernel backtraces. */
-	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);
+	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);       
 #endif
 }
 
@@ -326,13 +387,13 @@ static void _stp_stack_user_print(struct context *c, int sym_flags)
 
 	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
+	if (c->uregs == &c->uwcontext_user.info.regs) {
 		/* Unwinder needs the reg state, clear uregs ref. */
 		c->uregs = NULL;
 		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
 	}
 	__stp_dwarf_stack_user_print(regs, sym_flags, MAXBACKTRACE,
-				     &c->uwcontext, ri, uregs_valid);
+				     &c->uwcontext_user, ri, uregs_valid);
 #else
 	/* User stack traces only supported for arches with dwarf unwinder. */
 	if (sym_flags & _STP_SYM_SYMBOL)
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
index a11dc48..2d367cb 100644
--- a/runtime/unwind/unwind.h
+++ b/runtime/unwind/unwind.h
@@ -336,4 +336,15 @@ struct unwind_context {
 
 static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
 
+#ifndef MAXBACKTRACE
+#define MAXBACKTRACE 20
+#endif
+
+struct unwind_cache {
+  // XXX use more compact representation?
+  unsigned depth;
+  unsigned done; // if nonzero, unwind cannot proceed
+  unsigned long data[MAXBACKTRACE];
+};
+
 #endif /*_STP_UNWIND_H_*/
diff --git a/tapset/linux/context-symbols.stp b/tapset/linux/context-symbols.stp
index 78a0ae3..d488c5f 100644
--- a/tapset/linux/context-symbols.stp
+++ b/tapset/linux/context-symbols.stp
@@ -12,13 +12,19 @@
 //processor.
 // </tapsetdescription>
 
+function __stack_raw (n:long) %{ /* pragma:unwind */
+         STAP_RETVALUE = _stp_stack_kernel_get (CONTEXT, (unsigned)STAP_ARG_n);
+%}
 
 /**
  *  sfunction stack - return PC at stack unwind level n
  *  @n: number of levels to descend in the stack.
  */
 function stack:long (n:long) {
-         /* XXX this is the naive method */
+         r = __stack_raw(n);
+         if (r != 0) return r
+
+         /* fallback: parse backtrace() to go deeper in the stack */
          b = backtrace (); orig_n = n;
          sym = tokenize (b, " ");
          if (sym == "") @__context_unwind_error(orig_n);
diff --git a/tapsets.cxx b/tapsets.cxx
index 5070485..196c87d 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -183,6 +183,12 @@ common_probe_entryfn_prologue (translator_output* o, string statestr,
   o->newline() << "c->cycles_base = 0;";
   o->newline() << "#endif";
   */
+
+  o->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
+  o->newline() << "c->uwcache.depth = 0;";
+  o->newline() << "c->uwcache.done = 0;";
+  o->newline() << "c->uwcache.data[0] = 0;";
+  o->newline() << "#endif";
 }
 
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PR6580; patch for review] first step of unwind_cache enhancement
  2012-09-05 15:43 ` [PR6580; patch for review] first step of unwind_cache enhancement Serguei Makarov
@ 2012-09-06 22:17   ` Frank Ch. Eigler
  2012-09-07 14:17     ` Serguei Makarov
  2012-09-07 11:57   ` Mark Wielaard
  2012-09-13 20:01   ` [PR6580; patch for review] incremental unwind enhancement Serguei Makarov
  2 siblings, 1 reply; 6+ messages in thread
From: Frank Ch. Eigler @ 2012-09-06 22:17 UTC (permalink / raw)
  To: Serguei Makarov; +Cc: systemtap


smakarov wrote:

> diff --git a/runtime/common_probe_context.h b/runtime/common_probe_context.h
> index df07b90..61c1b26 100644
> --- a/runtime/common_probe_context.h
> +++ b/runtime/common_probe_context.h
> @@ -118,5 +118,12 @@ cycles_t cycles_sum;
>  
>  /* Current state of the unwinder (as used in the unwind.c dwarf unwinder). */
>  #if defined(STP_NEED_UNWIND_DATA)
> -struct unwind_context uwcontext;
> +// Invariants (uwcontext currently refers to uwcontext_kernel):
> +//   uwcache.depth == 0 --> uwcontext needs to be initialized
> +//   uwcache.depth > 0  --> uwcontext is state after uwcache.depth unwind steps
> +//   uwcache.depth >= n --> uwcache.data[n] contains PC at depth n
> +//     EXCEPT WHEN n == 0, uwcache.data[n] == 0 (current PC not yet retrieved)

Looking at the complexity, I wonder if we need such a cache at all.
Not many stap scripts seem to require more than one backtrace/stack
type call within them.  The opportunity for savings in the PR6580
world is mainly to avoid unwinding *deeper than necessary*, not to
avoid unwinding *repeatedly*.  (The case of caching user-regs from a
full kernel unwind is special.)


> +        /* We always start by fetching the current PC if it's not yet known. */
> +        if (c->uwcache.depth == 0 && c->uwcache.data[0] == 0) {
> +                if (! c->kregs) {
> +                        /* Even the current PC is unknown; so we have absolutely no data
> +                         * at any depth. Note that unlike _stp_stack_kernel_print(), we
> +                         * can't fall back to calling dump_trace() to obtain the
> +                         * backtrace. */
> +                        c->uwcache.done = 1; return 0;

Why can't we fall back to dump_trace in a non-printing case?  
Were you going to change the _print case to rely on this _get?


> +        if (c->uwcache.depth == 0) { /* need to initialize uwcontext */
> +                if (c->uwcache.depth == 0 && c->uregs == &c->uwcontext_kernel.info.regs) {
> +                        /* Unwinder needs the reg state, clear uregs ref. */
> +                        c->uregs = NULL;
> +                        c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;

We should have formal bitfields for this sort of thing, if we really need it.


> +function __stack_raw (n:long) %{ /* pragma:unwind */
> +         STAP_RETVALUE = _stp_stack_kernel_get (CONTEXT, (unsigned)STAP_ARG_n);
> +%}

Is there a risk here, considering that the new _stp_stack_kernel_get
doesn't check its 'n' parameter super well?


> +  o->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
> +  o->newline() << "c->uwcache.depth = 0;";
> +  o->newline() << "c->uwcache.done = 0;";
> +  o->newline() << "c->uwcache.data[0] = 0;";
> +  o->newline() << "#endif";

That seems like too much work to impose on all probes.


- FChE

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PR6580; patch for review] first step of unwind_cache enhancement
  2012-09-05 15:43 ` [PR6580; patch for review] first step of unwind_cache enhancement Serguei Makarov
  2012-09-06 22:17   ` Frank Ch. Eigler
@ 2012-09-07 11:57   ` Mark Wielaard
  2012-09-07 14:33     ` Serguei Makarov
  2012-09-13 20:01   ` [PR6580; patch for review] incremental unwind enhancement Serguei Makarov
  2 siblings, 1 reply; 6+ messages in thread
From: Mark Wielaard @ 2012-09-07 11:57 UTC (permalink / raw)
  To: Serguei Makarov; +Cc: systemtap, Frank Ch. Eigler

On Wed, 2012-09-05 at 11:43 -0400, Serguei Makarov wrote:
> The first incremental step to the runtime tweaks required to sanely
> implement PR6580. Namely, the kernel-side backtrace can be unwound one
> step at a time using _stp_stack_kernel_get, with the already unwound
> portion of the backtrace cached within the probe context.
> 
> This allows us to implement stack(n) (which returns the PC n levels
> deep within the stack) without having to resort to tokenizing a
> backtrace string each time.

Yeah, the tokenizing is really not a very efficient representation.

> The patch itself seems to be sound, but testing it drew attention to
> memory corruption errors in the existing DWARF unwinder (see PR14546).

Thanks for the testcase, this is hopefully resolved with commit 8f095f.




> diff --git a/runtime/common_probe_context.h
> b/runtime/common_probe_context.h
> index df07b90..61c1b26 100644
> --- a/runtime/common_probe_context.h
> +++ b/runtime/common_probe_context.h
> @@ -118,5 +118,12 @@ cycles_t cycles_sum;
>  
>  /* Current state of the unwinder (as used in the unwind.c dwarf
> unwinder). */
>  #if defined(STP_NEED_UNWIND_DATA)
> -struct unwind_context uwcontext;
> +// Invariants (uwcontext currently refers to uwcontext_kernel):
> +//   uwcache.depth == 0 --> uwcontext needs to be initialized
> +//   uwcache.depth > 0  --> uwcontext is state after uwcache.depth
> unwind steps
> +//   uwcache.depth >= n --> uwcache.data[n] contains PC at depth n
> +//     EXCEPT WHEN n == 0, uwcache.data[n] == 0 (current PC not yet
> retrieved)
> +struct unwind_cache uwcache;
> +struct unwind_context uwcontext_user;
> +struct unwind_context uwcontext_kernel;
>  #endif

An unwind_context is pretty big.
If at all possible I would only keep one.

User backtraces are a little special because depending on architecture
and probe point they might be able to start with a full set of registers
to feed to the DWARF unwinder, or might need to do a full DWARF unwind
from the current kernel probe point to the user/kernel boundary. This is
what _stp_get_uregs in stack.c does.

> +/* NB: Returns 0 for depth == 0 -- DWARF unwinder does not fetch
> + * current PC.  Moreover, uwcontext is assumed to have been
> + * initialized by the caller. */
> +unsigned long __stp_dwarf_stack_kernel_get(struct pt_regs *regs, int depth,
> +                                           struct unwind_context *uwcontext,
> +                                           struct unwind_cache *uwcache)
> +{
> +        struct unwind_frame_info *info = &uwcontext->info;
> +        int ret;
> +  
> +        /* check if answer is already defined in cache; this is probably
> +         * redundant, being already handled for us by the caller */
> +        if (unlikely(uwcache->depth >= depth))
> +                return uwcache->data[depth];
> +
> +        /* check if unwind cannot proceed further */
> +        if (uwcache->done)
> +                return 0;
> +
> +        if (uwcache->depth == 0) /* need to initialize uwcontext->info */
> +                arch_unw_init_frame_info(info, regs, 0);

Where did the caller get the regs from? It should also not just use 0
for the last argument (sanitize). That depends on whether we are doing
user unwinding and how we got the registers. See the check in
_stp_get_uregs for _stp_task_pt_regs_valid().

>  #else
>         /* Arch specific fallback for kernel backtraces. */
> -       __stp_stack_print(regs, sym_flags, MAXBACKTRACE);
> +       __stp_stack_print(regs, sym_flags, MAXBACKTRACE);       
>  #endif
>  }

Whitespace change?

> +struct unwind_cache {
> +  // XXX use more compact representation?
> +  unsigned depth;
> +  unsigned done; // if nonzero, unwind cannot proceed
> +  unsigned long data[MAXBACKTRACE];
> +};

You could use a signed value and make negative values say "done at
-depth". Frank might be right that keeping the old PC values is not of
that much use. You could just keep just the unwind_context and depth,
then check the requested depth is one beyond what we have in the context
and if not, just start from scratch again. Probably depends on some
investigation of how often callers/backtraces are reused in a single
probe handler.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PR6580; patch for review] first step of unwind_cache enhancement
  2012-09-06 22:17   ` Frank Ch. Eigler
@ 2012-09-07 14:17     ` Serguei Makarov
  0 siblings, 0 replies; 6+ messages in thread
From: Serguei Makarov @ 2012-09-07 14:17 UTC (permalink / raw)
  To: Frank Ch. Eigler, systemtap

> Looking at the complexity, I wonder if we need such a cache at all.
> Not many stap scripts seem to require more than one backtrace/stack
> type call within them.  The opportunity for savings in the PR6580
> world is mainly to avoid unwinding *deeper than necessary*, not to
> avoid unwinding *repeatedly*.  (The case of caching user-regs from a
> full kernel unwind is special.)

If we want to implement the more complex tapset functions in the script language in terms of stack(), some scheme like this is necessary. For instance, a script-language implementation of callers(n) calls stack() up to n times.

If we are to implement each tapset function in embedded-C, basically the only improvement I can see over the current git master is that instead of a string to tokenize, we might be passing back an array of PC values from doing the full backtrace.

> +        /* We always start by fetching the current PC if it's not yet known. */
> +        if (c->uwcache.depth == 0 && c->uwcache.data[0] == 0) {
> +                if (! c->kregs) {
> +                        /* Even the current PC is unknown; so we have absolutely no data
> +                         * at any depth. Note that unlike _stp_stack_kernel_print(), we
> +                         * can't fall back to calling dump_trace() to obtain the
> +                         * backtrace. */
> +                        c->uwcache.done = 1; return 0;

> Why can't we fall back to dump_trace in a non-printing case?  

Because dump_trace prints a string, the only way to use it as a fallback is to capture the string and, again, tokenize it. This is easier and safer to do on the tapset side, in the implementation of stack().

Given the confusion, I should probably make this much more explicit in comments.

> Were you going to change the _print case to rely on this _get?

The following portion of the patch I sent already does this. Note the invocation to _stp_stack_kernel_get(), and the fact that when this invocation (or its prerequisites) fail, the new code uses all of the same fallbacks as the old code (including ones corresponding to the logic in __stp_dwarf_stack_kernel_print, which is no longer invoked):

============================ BEGIN CODE =====================================

@@ -220,10 +280,14 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 
 static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 {
-	struct pt_regs *regs = NULL;
+        unsigned n, remaining;
+        unsigned long l;
+
+        if (! c->kregs) {
+                /* This is a fatal block for _stp_stack_kernel_get,
+                   but when printing a backtrace we can use this
+                   inexact fallback.
 
-	if (! c->kregs) {
-		/* For the kernel we can use an inexact fallback.
 		   When compiled with frame pointers we can do
 		   a pretty good guess at the stack value,
 		   otherwise let dump_stack guess it
@@ -248,36 +312,33 @@ static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 			_stp_print("\n");
 #endif
 		return;
-	} else {
-		regs = c->kregs;
-	}
+        }
+
+        /* print the current address */
+        if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi
+            && (sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
+                _stp_print("Returning from: ");
+                _stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
+                                sym_flags, NULL);
+                _stp_print("Returning to  : ");
+        }
+        _stp_print_addr(_stp_stack_kernel_get(c, 0), sym_flags, NULL);
 
-	/* print the current address */
-	if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi) {
-		if ((sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
-			_stp_print("Returning from: ");
-			_stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
-					sym_flags, NULL);
-			_stp_print("Returning to  : ");
-		}
-		_stp_print_addr((unsigned long)_stp_ret_addr_r(c->ips.krp.pi),
-				sym_flags, NULL);
-	} else {
-		_stp_print_addr(REG_IP(regs), sym_flags, NULL);
-	}
-
-	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
-		/* Unwinder needs the reg state, clear uregs ref. */
-		c->uregs = NULL;
-		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
-	}
-	__stp_dwarf_stack_kernel_print(regs, sym_flags, MAXBACKTRACE,
-				       &c->uwcontext);
+        for (n = 1; n < MAXBACKTRACE; n++) {
+                l = _stp_stack_kernel_get(c, n);
+                if (l == 0) {
+                        remaining = MAXBACKTRACE - n;
+                        _stp_stack_print_fallback(UNW_SP(&c->uwcontext_kernel.info),
+                                                  sym_flags, remaining, 0);
+                        break;
+                } else {
+                        _stp_print_addr(l, sym_flags, NULL);
+                }
+        }
 #else
 	/* Arch specific fallback for kernel backtraces. */
-	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);
+	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);       
 #endif
 }
 
============================ END CODE =====================================

> > +        if (c->uwcache.depth == 0) { /* need to initialize uwcontext */
> > +                if (c->uwcache.depth == 0 && c->uregs == &c->uwcontext_kernel.info.regs) {
> > +                        /* Unwinder needs the reg state, clear uregs ref. */
> > +                        c->uregs = NULL;
> > +                        c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;

> We should have formal bitfields for this sort of thing, if we really need it.

I'll look into this issue. The fact that this code will probably run into problems when we interleave user- and kernel- backtrace invocations gives additional motivation.

> > +function __stack_raw (n:long) %{ /* pragma:unwind */
> > +         STAP_RETVALUE = _stp_stack_kernel_get (CONTEXT, (unsigned)STAP_ARG_n);
> > +%}

> Is there a risk here, considering that the new _stp_stack_kernel_get
> doesn't check its 'n' parameter super well?

Good catch, thank you.

> > +  o->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
> > +  o->newline() << "c->uwcache.depth = 0;";
> > +  o->newline() << "c->uwcache.done = 0;";
> > +  o->newline() << "c->uwcache.data[0] = 0;";
> > +  o->newline() << "#endif";
>
> That seems like too much work to impose on all probes.

Agreed; the next patch I'll send (pending discussion on this one), improves this slightly by reducing the number of assignments down to one (marking the unwind cache 'uninitialized' by setting 'done' to a magic value). There's probably a much better solution I'm not seeing, though.

All the best,
       Serguei

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PR6580; patch for review] first step of unwind_cache enhancement
  2012-09-07 11:57   ` Mark Wielaard
@ 2012-09-07 14:33     ` Serguei Makarov
  0 siblings, 0 replies; 6+ messages in thread
From: Serguei Makarov @ 2012-09-07 14:33 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: systemtap, Frank Ch. Eigler

> An unwind_context is pretty big.
> If at all possible I would only keep one.

I can only think of two solutions for (potential) memory savings here:
- keep only one uwcontext; if we call a user backtrace function after a kernel one (or vice versa), then we need to erase the current uwcontext for the user side and do a kernel unwind from the beginning in the same space
- keep both uwcontexts, but allocate them dynamically when needed (blegh) rather than embedding them in the probe context struct

Perhaps there's a better solution.

> +/* NB: Returns 0 for depth == 0 -- DWARF unwinder does not fetch
> + * current PC.  Moreover, uwcontext is assumed to have been
> + * initialized by the caller. */
> +unsigned long __stp_dwarf_stack_kernel_get(struct pt_regs *regs, int depth,
> +                                           struct unwind_context *uwcontext,
> +                                           struct unwind_cache *uwcache)
> +{
> +        struct unwind_frame_info *info = &uwcontext->info;
> +        int ret;
> +  
> +        /* check if answer is already defined in cache; this is probably
> +         * redundant, being already handled for us by the caller */
> +        if (unlikely(uwcache->depth >= depth))
> +                return uwcache->data[depth];
> +
> +        /* check if unwind cannot proceed further */
> +        if (uwcache->done)
> +                return 0;
> +
> +        if (uwcache->depth == 0) /* need to initialize uwcontext->info */
> +                arch_unw_init_frame_info(info, regs, 0);

> Where did the caller get the regs from?

The caller would be _stp_stack_kernel_get, which got the regs from c->kregs(). This function works for the kernel-side unwind only; there would be a separate function *_user_get for the user-side unwind in the next patch.

> It should also not just use 0
> for the last argument (sanitize). That depends on whether we are doing
> user unwinding and how we got the registers. See the check in
> _stp_get_uregs for _stp_task_pt_regs_valid().

Alright, I'll check what to do with this. This is basically identical to how __stp_dwarf_stack_kernel_print sets things up, though.

> >  #else
> >         /* Arch specific fallback for kernel backtraces. */
> > -       __stp_stack_print(regs, sym_flags, MAXBACKTRACE);
> > +       __stp_stack_print(regs, sym_flags, MAXBACKTRACE);       
> >  #endif
> >  }

> Whitespace change?

Thanks for the catch. Swore I had my emacs indentation set up adequately for this...

> You could use a signed value and make negative values say "done at
> -depth". Frank might be right that keeping the old PC values is not of
> that much use. You could just keep just the unwind_context and depth,
> then check the requested depth is one beyond what we have in the context
> and if not, just start from scratch again. Probably depends on some
> investigation of how often callers/backtraces are reused in a single
> probe handler.

Getting rid of the cache would probably save memory, but not make the logic much less complicated. (Still, the former might be worthwhile.) The current scheme gives maximum peace of mind on the tapset side to implement everything in terms of stack() without undue overhead.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PR6580; patch for review] incremental unwind enhancement
  2012-09-05 15:43 ` [PR6580; patch for review] first step of unwind_cache enhancement Serguei Makarov
  2012-09-06 22:17   ` Frank Ch. Eigler
  2012-09-07 11:57   ` Mark Wielaard
@ 2012-09-13 20:01   ` Serguei Makarov
  2 siblings, 0 replies; 6+ messages in thread
From: Serguei Makarov @ 2012-09-13 20:01 UTC (permalink / raw)
  To: systemtap

[-- Attachment #1: Type: text/plain, Size: 1712 bytes --]

Updated version of the backtrace/unwind patch based on feedback. This one makes the cache *optional* (enabled with -DSTP_FULL_BACKTRACE_CACHE); otherwise, only the current unwind state for each of user / kernel space is retained, which is sufficient to implement PR6580 tapset functions efficiently so long as they call stack() / ustack() with increasing consecutive numbers.

Added initialization overhead for a probe prologue is now only two assignments (to set depth = 0 on both unwind states). This is probably tolerable; I could modify one of the visitors to search for /* pragma:unwind */ in tapset functions used by a probe and generate the code conditionally, but that would be severe overkill.

The patch is robust enough to pass the exelib.exp stress test, among other things. Any feedback/nitpicking would be very welcome, though.

----- Original Message -----
From: "Serguei Makarov" <smakarov@redhat.com>
To: systemtap@sourceware.org
Cc: "Frank Ch. Eigler" <fche@redhat.com>, "Mark Wielaard" <mjw@redhat.com>
Sent: Wednesday, September 5, 2012 11:43:07 AM
Subject: [PR6580; patch for review] first step of unwind_cache enhancement

The first incremental step to the runtime tweaks required to sanely implement PR6580. Namely, the kernel-side backtrace can be unwound one step at a time using _stp_stack_kernel_get, with the already unwound portion of the backtrace cached within the probe context.

This allows us to implement stack(n) (which returns the PC n levels deep within the stack) without having to resort to tokenizing a backtrace string each time.

The patch itself seems to be sound, but testing it drew attention to memory corruption errors in the existing DWARF unwinder (see PR14546).

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: uwcache.patch --]
[-- Type: text/x-patch; name=uwcache.patch, Size: 21052 bytes --]

diff --git a/runtime/common_probe_context.h b/runtime/common_probe_context.h
index df07b90..062bec1 100644
--- a/runtime/common_probe_context.h
+++ b/runtime/common_probe_context.h
@@ -118,5 +118,15 @@ cycles_t cycles_sum;
 
 /* Current state of the unwinder (as used in the unwind.c dwarf unwinder). */
 #if defined(STP_NEED_UNWIND_DATA)
-struct unwind_context uwcontext;
+// Invariants (for both _kernel and _user unwind states):
+//   uwcache.depth == 0 --> uwcontext needs to be initialized
+//   uwcache.depth > 0  --> uwcontext is state after uwcache.depth unwind
+//                          steps, where fetching current PC counts as a step
+//   uwcache.depth < 0  --> unwind finished after -uwcache.depth steps
+// ... and when STP_FULL_BACKTRACE_CACHE is enabled, we have:
+//   uwcache.depth > n  --> uwcache.data[n] contains PC at depth n
+struct unwind_cache uwcache_user;
+struct unwind_cache uwcache_kernel;
+struct unwind_context uwcontext_user;
+struct unwind_context uwcontext_kernel;
 #endif
diff --git a/runtime/stack-dwarf.c b/runtime/stack-dwarf.c
deleted file mode 100644
index 9c55997..0000000
--- a/runtime/stack-dwarf.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* -*- linux-c -*-
- * x86 stack tracing functions
- * Copyright (C) 2005-2011 Red Hat Inc.
- *
- * This file is part of systemtap, and is free software.  You can
- * redistribute it and/or modify it under the terms of the GNU General
- * Public License (GPL); either version 2, or (at your option) any
- * later version.
- */
-
-#ifdef STAPCONF_LINUX_UACCESS_H
-#include <linux/uaccess.h>
-#else
-#include <asm/uaccess.h>
-#endif
-#include <linux/types.h>
-#define intptr_t long
-#define uintptr_t unsigned long
-
-static int _stp_valid_pc_addr(unsigned long addr, struct task_struct *tsk)
-{
-	/* Just a simple check of whether the the address can be accessed
-	   as a user space address. Zero is always bad. */
-
-/* FIXME for s390x PR13350. */
-#if defined (__s390__) || defined (__s390x__)
-       return addr != 0L;
-#else
-	int ok;
-	mm_segment_t oldfs = get_fs();
-	set_fs(USER_DS);
-	ok = access_ok(VERIFY_READ, (long *) (intptr_t) addr, sizeof(long));
-	set_fs(oldfs);
-	return addr != 0L && tsk != NULL ? ok : ! ok;
-#endif
-}
-
-static void __stp_dwarf_stack_kernel_print(struct pt_regs *regs, int verbose,
-			      int levels, struct unwind_context *uwcontext)
-{
-	struct unwind_frame_info *info = &uwcontext->info;
-	arch_unw_init_frame_info(info, regs, 0);
-
-	while (levels) {
-		int ret = unwind(uwcontext, 0);
-		dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
-		if (ret == 0 && _stp_valid_pc_addr(UNW_PC(info), NULL)) {
-			_stp_print_addr(UNW_PC(info), verbose, NULL);
-			levels--;
-			if (UNW_PC(info) != _stp_kretprobe_trampoline)
-			  continue;
-		}
-		/* If an error happened or we hit a kretprobe trampoline,
-		 * and the current pc frame address is still valid kernel
-		 * address use fallback backtrace, unless user task backtrace.
-		 * FIXME: is there a way to unwind across kretprobe
-		 * trampolines? PR9999. */
-		if ((ret < 0 || UNW_PC(info) == _stp_kretprobe_trampoline))
-			_stp_stack_print_fallback(UNW_SP(info),
-						  verbose, levels, 0);
-		return;
-	}
-}
-
-
-static void __stp_dwarf_stack_user_print(struct pt_regs *regs, int verbose,
-			      int levels, struct unwind_context *uwcontext,
-			      struct uretprobe_instance *ri, int uregs_valid)
-{
-	struct unwind_frame_info *info = &uwcontext->info;
-	arch_unw_init_frame_info(info, regs, ! uregs_valid);
-
-	while (levels) {
-		int ret = unwind(uwcontext, 1);
-#ifdef STAPCONF_UPROBE_GET_PC
-                unsigned long maybe_pc = 0;
-                if (ri) {
-                        maybe_pc = uprobe_get_pc(ri, UNW_PC(info),
-                                                 UNW_SP(info));
-                        if (!maybe_pc)
-                                printk("SYSTEMTAP ERROR: uprobe_get_return returned 0\n");
-                        else
-                                UNW_PC(info) = maybe_pc;
-                }
-#endif
-		dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
-		if (ret == 0 && _stp_valid_pc_addr(UNW_PC(info), current)) {
-			_stp_print_addr(UNW_PC(info), verbose, current);
-			levels--;
-			continue;
-		}
-
-		/* If an error happened or the PC becomes invalid, then
-		 * we are done, _stp_stack_print_fallback is only for
-		 * kernel space. */
-		return;
-	}
-}
diff --git a/runtime/stack.c b/runtime/stack.c
index 413ef1e..410698f 100644
--- a/runtime/stack.c
+++ b/runtime/stack.c
@@ -46,7 +46,32 @@
 static void _stp_stack_print_fallback(unsigned long, int, int, int);
 
 #ifdef STP_USE_DWARF_UNWINDER
-#include "stack-dwarf.c"
+#ifdef STAPCONF_LINUX_UACCESS_H
+#include <linux/uaccess.h>
+#else
+#include <asm/uaccess.h>
+#endif
+#include <linux/types.h>
+#define intptr_t long
+#define uintptr_t unsigned long
+
+static int _stp_valid_pc_addr(unsigned long addr, struct task_struct *tsk)
+{
+	/* Just a simple check of whether the the address can be accessed
+	   as a user space address. Zero is always bad. */
+
+/* FIXME for s390x PR13350. */
+#if defined (__s390__) || defined (__s390x__)
+       return addr != 0L;
+#else
+	int ok;
+	mm_segment_t oldfs = get_fs();
+	set_fs(USER_DS);
+	ok = access_ok(VERIFY_READ, (long *) (intptr_t) addr, sizeof(long));
+	set_fs(oldfs);
+	return addr != 0L && tsk != NULL ? ok : ! ok;
+#endif
+}
 #endif
 
 #if defined (__ia64__)
@@ -153,6 +178,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
     c->probe_flags |= _STP_PROBE_STATE_FULL_UREGS;
   else if (c->uregs == NULL)
     {
+      dbug_unwind(1, "computing uregs\n");
       /* First try simple recovery through task_pt_regs,
 	 on some platforms that already provides complete uregs. */
       c->uregs = _stp_current_pt_regs();
@@ -165,7 +191,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
       else if (c->uregs != NULL && c->kregs != NULL
 	       && ! (c->probe_flags & _STP_PROBE_STATE_USER_MODE))
 	{
-	  struct unwind_frame_info *info = &c->uwcontext.info;
+	  struct unwind_frame_info *info = &c->uwcontext_user.info;
 	  int ret = 0;
 	  int levels;
 
@@ -189,7 +215,7 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 	  while (levels > 0 && ret == 0 && UNW_PC(info) != REG_IP(c->uregs))
 	    {
 	      levels--;
-	      ret = unwind(&c->uwcontext, 0);
+	      ret = unwind(&c->uwcontext_user, 0);
 	      dbug_unwind(1, "unwind levels: %d, ret: %d, pc=0x%lx\n",
 			  levels, ret, UNW_PC(info));
 	    }
@@ -213,6 +239,118 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
   return c->uregs;
 }
 
+
+static unsigned long _stp_stack_unwind_one_kernel(struct context *c, unsigned depth)
+{
+	struct pt_regs *regs = NULL;
+	struct unwind_frame_info *info = NULL;
+	int ret;
+
+	if (depth == 0) { /* Start by fetching the current PC. */
+		dbug_unwind(1, "STARTING kernel unwind\n");
+
+		if (! c->kregs) {
+			/* Even the current PC is unknown; so we have
+			 * absolutely no data at any depth.
+			 *
+			 * Note that unlike _stp_stack_kernel_print(),
+			 * we can't fall back to calling dump_trace()
+			 * to obtain the backtrace -- since that
+			 * returns a string, which we would have to
+			 * tokenize. Callers that want to use the
+			 * dump_trace() fallback should call
+			 * _stp_stack_kernel_print() and do their own
+			 * tokenization of the result. */
+			return 0;
+		} else if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE
+			   && c->ips.krp.pi) {
+			return (unsigned long)_stp_ret_addr_r(c->ips.krp.pi);
+		} else {
+			return REG_IP(c->kregs);
+		}
+	}
+
+#ifdef STP_USE_DWARF_UNWINDER
+	/* Otherwise, use the DWARF unwinder to unwind one step. */
+	if (! c->kregs) {
+		return 0;
+	}
+	regs = c->kregs;
+
+	info = &c->uwcontext_kernel.info;
+
+	dbug_unwind(1, "CONTINUING kernel unwind to depth %d\n", depth);
+
+	if (depth == 1) { /* need to clear uregs & set up uwcontext->info */
+		if (c->uregs == &c->uwcontext_kernel.info.regs) {
+			dbug_unwind(1, "clearing uregs\n");
+			/* Unwinder needs the reg state, clear uregs ref. */
+			c->uregs = NULL;
+			c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+		}
+
+		arch_unw_init_frame_info(info, regs, 0);
+	}
+
+	ret = unwind(&c->uwcontext_kernel, 0);
+	dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
+
+	/* check if unwind hit an error */
+	if (ret || ! _stp_valid_pc_addr(UNW_PC(info), NULL)) {
+		return 0;
+	}
+
+	return UNW_PC(info);
+#else
+	return 0;
+#endif
+}
+
+static unsigned long _stp_stack_kernel_get(struct context *c, unsigned depth)
+{
+	if (unlikely(depth >= MAXBACKTRACE)) {
+		return 0;
+	}
+
+	/* Reset uwcontext if necessary */
+	if (c->uwcache_kernel.depth < 0) { /* unwind finished earlier */
+		if (depth < 0 - c->uwcache_kernel.depth) {
+#ifdef STP_FULL_BACKTRACE_CACHE
+			return c->uwcache_kernel.data[depth];
+#else
+			c->uwcache_kernel.depth = 0;
+#endif
+		} else {
+			return 0; /* unwind does not reach this far */
+		}
+	} else if (c->uwcache_kernel.depth > depth) {
+#ifdef STP_FULL_BACKTRACE_CACHE
+		return c->uwcache_kernel.data[depth];
+#else
+		c->uwcache_kernel.depth = 0;
+#endif
+	}
+
+	/* Advance uwcontext to the required depth. */
+	while (c->uwcache_kernel.depth <= depth) {
+		c->uwcache_kernel.pc = _stp_stack_unwind_one_kernel(c, c->uwcache_kernel.depth);
+#ifdef STP_FULL_BACKTRACE_CACHE
+		c->uwcache_kernel.data[c->uwcache_kernel.depth] = c->uwcache_kernel.pc;
+#endif
+		if (c->uwcache_kernel.pc == 0
+			|| c->uwcache_kernel.pc == _stp_kretprobe_trampoline) {
+			/* Mark unwind completed. */
+			c->uwcache_kernel.depth = -c->uwcache_kernel.depth;
+			break;
+			/* XXX: is there a way to unwind across kretprobe trampolines? PR9999 */
+		}
+		c->uwcache_kernel.depth ++;
+	}
+
+	/* Return the program counter at the current depth. */
+	return c->uwcache_kernel.pc;
+}
+
 /** Prints the stack backtrace
  * @param regs A pointer to the struct pt_regs.
  * @param verbose _STP_SYM_FULL or _STP_SYM_BRIEF
@@ -220,14 +358,18 @@ static struct pt_regs *_stp_get_uregs(struct context *c)
 
 static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 {
-	struct pt_regs *regs = NULL;
+	unsigned n, remaining;
+	unsigned long l;
 
 	if (! c->kregs) {
-		/* For the kernel we can use an inexact fallback.
-		   When compiled with frame pointers we can do
-		   a pretty good guess at the stack value,
-		   otherwise let dump_stack guess it
-		   (and skip some framework frames). */
+		/* This is a fatal block for _stp_stack_kernel_get,
+		 * but when printing a backtrace we can use this
+		 * inexact fallback.
+		 *
+		 * When compiled with frame pointers we can do
+		 * a pretty good guess at the stack value,
+		 * otherwise let dump_stack guess it
+		 * (and skip some framework frames). */
 #if defined(STAPCONF_KERNEL_STACKTRACE) || defined(STAPCONF_KERNEL_STACKTRACE_NO_BP)
 		unsigned long sp;
 		int skip;
@@ -248,44 +390,46 @@ static void _stp_stack_kernel_print(struct context *c, int sym_flags)
 			_stp_print("\n");
 #endif
 		return;
-	} else {
-		regs = c->kregs;
 	}
 
 	/* print the current address */
-	if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi) {
-		if ((sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
-			_stp_print("Returning from: ");
-			_stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
-					sym_flags, NULL);
-			_stp_print("Returning to  : ");
-		}
-		_stp_print_addr((unsigned long)_stp_ret_addr_r(c->ips.krp.pi),
+	if (c->probe_type == _STP_PROBE_HANDLER_KRETPROBE && c->ips.krp.pi
+	    && (sym_flags & _STP_SYM_FULL) == _STP_SYM_FULL) {
+		_stp_print("Returning from: ");
+		_stp_print_addr((unsigned long)_stp_probe_addr_r(c->ips.krp.pi),
 				sym_flags, NULL);
-	} else {
-		_stp_print_addr(REG_IP(regs), sym_flags, NULL);
+		_stp_print("Returning to  : ");
 	}
+	_stp_print_addr(_stp_stack_kernel_get(c, 0), sym_flags, NULL);
 
-	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
-		/* Unwinder needs the reg state, clear uregs ref. */
-		c->uregs = NULL;
-		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+	for (n = 1; n < MAXBACKTRACE; n++) {
+		l = _stp_stack_kernel_get(c, n);
+		if (l == 0) {
+			remaining = MAXBACKTRACE - n;
+			_stp_stack_print_fallback(UNW_SP(&c->uwcontext_kernel.info),
+						  sym_flags, remaining, 0);
+			break;
+		} else {
+			_stp_print_addr(l, sym_flags, NULL);
+		}
 	}
-	__stp_dwarf_stack_kernel_print(regs, sym_flags, MAXBACKTRACE,
-				       &c->uwcontext);
 #else
 	/* Arch specific fallback for kernel backtraces. */
 	__stp_stack_print(regs, sym_flags, MAXBACKTRACE);
 #endif
 }
 
-static void _stp_stack_user_print(struct context *c, int sym_flags)
+static unsigned long _stp_stack_unwind_one_user(struct context *c, unsigned depth)
 {
 	struct pt_regs *regs = NULL;
 	int uregs_valid = 0;
 	struct uretprobe_instance *ri = NULL;
+	struct unwind_frame_info *info = NULL;
+	int ret;
+#ifdef STAPCONF_UPROBE_GET_PC
+	unsigned long maybe_pc;
+#endif
 
 	if (c->probe_type == _STP_PROBE_HANDLER_URETPROBE)
 		ri = c->ips.ri;
@@ -294,9 +438,127 @@ static void _stp_stack_user_print(struct context *c, int sym_flags)
 		ri = GET_PC_URETPROBE_NONE;
 #endif
 
+	/* XXX: The computation that gives this is cached, so calling
+	 * _stp_get_uregs multiple times is okay... probably. */
 	regs = _stp_get_uregs(c);
 	uregs_valid = c->probe_flags & _STP_PROBE_STATE_FULL_UREGS;
 
+	if (! current->mm || ! regs)
+		return 0; // no user backtrace at this probe point
+
+	if (depth == 0) { /* Start by fetching the current PC. */
+		dbug_unwind(1, "STARTING user unwind\n");
+
+#ifdef STAPCONF_UPROBE_GET_PC
+		if (c->probe_type == _STP_PROBE_HANDLER_URETPROBE && ri) {
+			return ri->ret_addr;
+		} else {
+			return REG_IP(regs);
+		}
+#else
+		return REG_IP(regs);
+#endif
+	}
+
+#ifdef STP_USE_DWARF_UNWINDER
+	info = &c->uwcontext_user.info;
+
+	dbug_unwind(1, "CONTINUING user unwind to depth %d\n", depth);
+
+	if (depth == 1) { /* need to clear uregs & set up uwcontext->info */
+		if (c->uregs == &c->uwcontext_user.info.regs) {
+			dbug_unwind(1, "clearing uregs\n");
+			/* Unwinder needs the reg state, clear uregs ref. */
+			c->uregs = NULL;
+			c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+		}
+
+		arch_unw_init_frame_info(info, regs, 0);
+	}
+
+	ret = unwind(&c->uwcontext_user, 1);
+#ifdef STAPCONF_UPROBE_GET_PC
+	maybe_pc = 0;
+	if (ri) {
+		maybe_pc = uprobe_get_pc(ri, UNW_PC(info), UNW_SP(info));
+		if (!maybe_pc)
+			printk("SYSTEMTAP ERROR: uprobe_get_return returned 0\n");
+		else
+			UNW_PC(info) = maybe_pc;
+	}
+#endif
+	dbug_unwind(1, "ret=%d PC=%lx SP=%lx\n", ret, UNW_PC(info), UNW_SP(info));
+
+	/* check if unwind hit an error */
+	if (ret || ! _stp_valid_pc_addr(UNW_PC(info), current)) {
+		return 0;
+	}
+
+	return UNW_PC(info);
+#else
+	/* User stack traces only supported for arches with dwarf unwinder. */
+	return 0;
+#endif
+}
+
+static unsigned long _stp_stack_user_get(struct context *c, unsigned depth)
+{
+	if (unlikely(depth >= MAXBACKTRACE)) {
+		return 0;
+	}
+
+	/* Reset uwcontext if necessary */
+	if (c->uwcache_user.depth < 0) { /* unwind finished earlier */
+		if (depth < 0 - c->uwcache_user.depth) {
+#ifdef STP_FULL_BACKTRACE_CACHE
+			return c->uwcache_user.data[depth];
+#else
+			c->uwcache_user.depth = 0;
+#endif
+		} else {
+			return 0; /* unwind does not reach this far */
+		}
+	} else if (c->uwcache_user.depth > depth) {
+#ifdef STP_FULL_BACKTRACE_CACHE
+		return c->uwcache_user.data[depth];
+#else
+		c->uwcache_user.depth = 0;
+#endif
+	}
+
+	/* Advance uwcontext to the required depth. */
+	while (c->uwcache_user.depth <= depth) {
+		c->uwcache_user.pc = _stp_stack_unwind_one_user(c, c->uwcache_user.depth);
+#ifdef STP_FULL_BACKTRACE_CACHE
+		c->uwcache_user.data[c->uwcache_user.depth] = c->uwcache_user.pc;
+#endif
+		if (c->uwcache_user.pc == 0) {
+			/* Mark unwind completed. */
+			c->uwcache_user.depth = -c->uwcache_user.depth;
+			break;
+		}
+		c->uwcache_user.depth ++;
+	}
+
+	/* Return the program counter at the current depth. */
+	return c->uwcache_user.pc;
+}
+
+static void _stp_stack_user_print(struct context *c, int sym_flags)
+{
+	struct pt_regs *regs = NULL;
+	struct uretprobe_instance *ri = NULL;
+	unsigned n; unsigned long l;
+
+	if (c->probe_type == _STP_PROBE_HANDLER_URETPROBE)
+		ri = c->ips.ri;
+#ifdef STAPCONF_UPROBE_GET_PC
+	else if (c->probe_type == _STP_PROBE_HANDLER_UPROBE)
+		ri = GET_PC_URETPROBE_NONE;
+#endif
+
+	regs = _stp_get_uregs(c);
+
 	if (! current->mm || ! regs) {
 		if (sym_flags & _STP_SYM_SYMBOL)
 			_stp_printf("<no user backtrace at %s>\n",
@@ -314,25 +576,18 @@ static void _stp_stack_user_print(struct context *c, int sym_flags)
 			/* ... otherwise this dereference fails */
 			_stp_print_addr(ri->rp->u.vaddr, sym_flags, current);
 			_stp_print("Returning to  : ");
-			_stp_print_addr(ri->ret_addr, sym_flags, current);
-		} else
-			_stp_print_addr(ri->ret_addr, sym_flags, current);
-	} else {
-		_stp_print_addr(REG_IP(regs), sym_flags, current);
+		}
 	}
-#else
-	_stp_print_addr(REG_IP(regs), sym_flags, current);
 #endif
+	_stp_print_addr(_stp_stack_user_get(c, 0), sym_flags, current);
 
 	/* print rest of stack... */
 #ifdef STP_USE_DWARF_UNWINDER
-	if (c->uregs == &c->uwcontext.info.regs) {
-		/* Unwinder needs the reg state, clear uregs ref. */
-		c->uregs = NULL;
-		c->probe_flags &= ~_STP_PROBE_STATE_FULL_UREGS;
+	for (n = 1; n < MAXBACKTRACE; n++) {
+		l = _stp_stack_user_get(c, n);
+		if (l == 0) break; // No user space fallback available
+		_stp_print_addr(l, sym_flags, current);
 	}
-	__stp_dwarf_stack_user_print(regs, sym_flags, MAXBACKTRACE,
-				     &c->uwcontext, ri, uregs_valid);
 #else
 	/* User stack traces only supported for arches with dwarf unwinder. */
 	if (sym_flags & _STP_SYM_SYMBOL)
diff --git a/runtime/unwind/unwind.h b/runtime/unwind/unwind.h
index a11dc48..594e9a8 100644
--- a/runtime/unwind/unwind.h
+++ b/runtime/unwind/unwind.h
@@ -336,4 +336,16 @@ struct unwind_context {
 
 static const struct cfa badCFA = { ARRAY_SIZE(reg_info), 1 };
 
+#ifndef MAXBACKTRACE
+#define MAXBACKTRACE 20
+#endif
+
+struct unwind_cache {
+    int depth; /* if < 0, unwind finished at -depth; if 0, context not set up */
+    unsigned long pc;
+#if defined(STP_FULL_BACKTRACE_CACHE)
+    unsigned long data[MAXBACKTRACE];
+#endif
+};
+
 #endif /*_STP_UNWIND_H_*/
diff --git a/tapset/linux/context-symbols.stp b/tapset/linux/context-symbols.stp
index e574aed..dafad17 100644
--- a/tapset/linux/context-symbols.stp
+++ b/tapset/linux/context-symbols.stp
@@ -12,13 +12,24 @@
 //processor.
 // </tapsetdescription>
 
+function __stack_raw (n:long) %{ /* pragma:unwind */
+         /* basic sanity check for bounds: */
+         if (unlikely(STAP_ARG_n < 0 || STAP_ARG_n >= MAXBACKTRACE))
+                  STAP_RETVALUE = 0;
+         else
+                  STAP_RETVALUE = _stp_stack_kernel_get (CONTEXT, (unsigned)STAP_ARG_n);
+%}
 
 /**
  *  sfunction stack - return PC at stack unwind level n
  *  @n: number of levels to descend in the stack.
  */
 function stack:long (n:long) {
-         /* XXX this is the naive method */
+         if (n < 0 || n >= %{ MAXBACKTRACE %}) return 0
+         r = __stack_raw(n);
+         if (r != 0) return r
+
+         /* fallback: parse backtrace() to go deeper in the stack */
          b = backtrace (); orig_n = n;
          sym = tokenize (b, " ");
          if (sym == "") @__context_unwind_error(orig_n);
diff --git a/tapset/linux/ucontext-symbols.stp b/tapset/linux/ucontext-symbols.stp
index 33d5a36..da642e0 100644
--- a/tapset/linux/ucontext-symbols.stp
+++ b/tapset/linux/ucontext-symbols.stp
@@ -13,12 +13,23 @@
 // the function symbol of an address.
 // </tapsetdescription>
 
+function __ustack_raw (n:long) %{ /* pragma:unwind */
+         /* basic sanity check for bounds: */
+         if (unlikely(STAP_ARG_n < 0 || STAP_ARG_n >= MAXBACKTRACE))
+                  STAP_RETVALUE = 0;
+         else
+                  STAP_RETVALUE = _stp_stack_user_get (CONTEXT, (unsigned)STAP_ARG_n);
+%}
+
 /**
  *  sfunction ustack - return PC at user-space stack unwind level n
  *  @n: number of levels to descend in the stack.
  */
 function ustack:long (n:long) {
-         /* XXX this is the naive method */
+         r = __ustack_raw(n);
+         if (r != 0) return r
+
+         /* fallback: parse backtrace() to go deeper in the stack */
          b = ubacktrace (); orig_n = n;
          sym = tokenize (b, " ");
          if (sym == "") @__context_unwind_error(orig_n);
diff --git a/tapsets.cxx b/tapsets.cxx
index 5070485..2a9ff3d 100644
--- a/tapsets.cxx
+++ b/tapsets.cxx
@@ -183,6 +183,11 @@ common_probe_entryfn_prologue (translator_output* o, string statestr,
   o->newline() << "c->cycles_base = 0;";
   o->newline() << "#endif";
   */
+
+  o->newline() << "#if defined(STP_NEED_UNWIND_DATA)";
+  o->newline() << "c->uwcache_user.depth = 0;";
+  o->newline() << "c->uwcache_kernel.depth = 0;";
+  o->newline() << "#endif";
 }
 
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-09-13 20:01 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <293195834.4565580.1346769465759.JavaMail.root@redhat.com>
2012-09-05 15:43 ` [PR6580; patch for review] first step of unwind_cache enhancement Serguei Makarov
2012-09-06 22:17   ` Frank Ch. Eigler
2012-09-07 14:17     ` Serguei Makarov
2012-09-07 11:57   ` Mark Wielaard
2012-09-07 14:33     ` Serguei Makarov
2012-09-13 20:01   ` [PR6580; patch for review] incremental unwind enhancement Serguei Makarov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).