On Mon, Mar 07, 2022 at 10:49:47AM +0000, Pedro Alves wrote: > On 2022-03-03 11:22, Stefan Hajnoczi wrote: > > Hi, > > The QEMU emulator uses coroutines with separate stacks. It can be > > challenging to debug coroutines that have yielded because GDB is not > > aware of them (no thread is currently executing them). > > > > QEMU has a GDB Python script that helps. It "creates" a stack frame for > > a given coroutine by temporarily setting register values and then using > > the "bt" command. This works on a live process under ptrace control but > > not for coredumps where registers can't be set. > > > > Here is the script (or see the bottom of this email for an inline copy > > of the relevant code): > > https://gitlab.com/qemu-project/qemu/-/blob/master/scripts/qemugdb/coroutine.py > > > > I hoped that "select-frame address ADDRESS" could be used instead so > > this would work on coredumps too. Unfortunately "select-frame" only > > searches stack frames that GDB is already aware of, so it cannot be used > > to backtrace coroutine stacks. > > > > Is there a way to backtrace a stack at an arbitrary address in GDB? > > I don't think there's an easy/great answer. Maybe it could > be done with a Python unwinder [1]? See gdb.python/py-unwind-user-regs.py > in the GDB testsuite for an example you could probably start with. I tried writing an unwinder that returns the topmost coroutine stack frame. "info threads" + "bt" shows the main stack though: (gdb) qemu coroutine 0x55be3c592120 Id Target Id Frame * 1 Thread 0x7f7abbdd4f00 (LWP 58989) Returning a frame with rip 0x55be3ae19ff4 0x00007f7abcd2489e in __ppoll (fds=0x21, nfds=6717500806073509987, timeout=, sigmask=0x1f000) at ../sysdeps/unix/sysv/linux/ppoll.c:43 ... #0 0x00007f7abcd2489e in __ppoll (fds=0x55be3c78a9f0, nfds=43, timeout=, timeout@entry=0x7ffef27cc040, sigmask=sigmask@entry=0x0) at ../sysdeps/unix/sysv/linux/ppoll.c:43 #1 0x000055be3ae26435 in ppoll (__ss=0x0, __timeout=0x7ffef27cc040, __nfds=, __fds=) at /usr/include/bits/poll2.h:81 I was hoping that frame #1 would be the coroutine stack since the debug message "Returning a frame with rip 0x55be3ae19ff4" shows the unwinder was invoked. I've included the code below in case anyone has suggestions for making the unwinder work. See bt_jmpbuf() and the Unwinder class. The idea is that bt_jmpbuf() passes the registers of the coroutine to the unwinder and invokes "info thread" + "bt". The unwinder only returns a stack frame the first time it's invoked. It cannot unwind successive stack frames so it disables itself after returning the topmost one (I was hoping GDB's built-in unwinder would take over from there). Thanks, Stefan --- # # GDB debugging support # # Copyright 2012 Red Hat, Inc. and/or its affiliates # # Authors: # Avi Kivity # # This work is licensed under the terms of the GNU GPL, version 2 # or later. See the COPYING file in the top-level directory. import gdb import gdb.unwinder VOID_PTR = gdb.lookup_type('void').pointer() class FrameId(object): def __init__(self, sp, pc): self.sp = sp self.pc = pc class Unwinder(gdb.unwinder.Unwinder): def __init__(self): super(Unwinder, self).__init__('QEMU coroutine unwinder') self._regs = None def arm(self, regs): self._regs = regs def __call__(self, pending_frame): print('A') if not self._regs: return None regs = self._regs self._regs = None frame_id = FrameId(regs['rbp'], regs['rip']) unwind_info = pending_frame.create_unwind_info(frame_id) for reg_name in regs: unwind_info.add_saved_register(reg_name, regs[reg_name]) print('Returning a frame with rip 0x%x' % regs['rip']) return unwind_info unwinder = Unwinder() gdb.unwinder.register_unwinder(None, unwinder) def pthread_self(): '''Fetch pthread_self() from the glibc start_thread function.''' f = gdb.newest_frame() while f.name() != 'start_thread': f = f.older() if f is None: return gdb.parse_and_eval('$fs_base') try: return f.read_var("arg") except ValueError: return gdb.parse_and_eval('$fs_base') def get_glibc_pointer_guard(): '''Fetch glibc pointer guard value''' fs_base = pthread_self() return gdb.parse_and_eval('*(uint64_t*)((uint64_t)%s + 0x30)' % fs_base) def glibc_ptr_demangle(val, pointer_guard): '''Undo effect of glibc's PTR_MANGLE()''' return gdb.parse_and_eval('(((uint64_t)%s >> 0x11) | ((uint64_t)%s << (64 - 0x11))) ^ (uint64_t)%s' % (val, val, pointer_guard)) def get_jmpbuf_regs(jmpbuf): JB_RBX = 0 JB_RBP = 1 JB_R12 = 2 JB_R13 = 3 JB_R14 = 4 JB_R15 = 5 JB_RSP = 6 JB_PC = 7 pointer_guard = get_glibc_pointer_guard() return {'rbx': jmpbuf[JB_RBX], 'rbp': glibc_ptr_demangle(jmpbuf[JB_RBP], pointer_guard), 'rsp': glibc_ptr_demangle(jmpbuf[JB_RSP], pointer_guard), 'r12': jmpbuf[JB_R12], 'r13': jmpbuf[JB_R13], 'r14': jmpbuf[JB_R14], 'r15': jmpbuf[JB_R15], 'rip': glibc_ptr_demangle(jmpbuf[JB_PC], pointer_guard) } def bt_jmpbuf(jmpbuf): '''Backtrace a jmpbuf''' regs = get_jmpbuf_regs(jmpbuf) unwinder.arm(regs) gdb.execute('info threads') gdb.execute('bt') def co_cast(co): return co.cast(gdb.lookup_type('CoroutineUContext').pointer()) def coroutine_to_jmpbuf(co): coroutine_pointer = co_cast(co) return coroutine_pointer['env']['__jmpbuf'] class CoroutineCommand(gdb.Command): '''Display coroutine backtrace''' def __init__(self): gdb.Command.__init__(self, 'qemu coroutine', gdb.COMMAND_DATA, gdb.COMPLETE_NONE) def invoke(self, arg, from_tty): argv = gdb.string_to_argv(arg) if len(argv) != 1: gdb.write('usage: qemu coroutine \n') return bt_jmpbuf(coroutine_to_jmpbuf(gdb.parse_and_eval(argv[0]))) class CoroutineBt(gdb.Command): '''Display backtrace including coroutine switches''' def __init__(self): gdb.Command.__init__(self, 'qemu bt', gdb.COMMAND_STACK, gdb.COMPLETE_NONE) def invoke(self, arg, from_tty): gdb.execute("bt") if gdb.parse_and_eval("qemu_in_coroutine()") == False: return co_ptr = gdb.parse_and_eval("qemu_coroutine_self()") while True: co = co_cast(co_ptr) co_ptr = co["base"]["caller"] if co_ptr == 0: break gdb.write("Coroutine at " + str(co_ptr) + ":\n") bt_jmpbuf(coroutine_to_jmpbuf(co_ptr)) class CoroutineSPFunction(gdb.Function): def __init__(self): gdb.Function.__init__(self, 'qemu_coroutine_sp') def invoke(self, addr): return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rsp'].cast(VOID_PTR) class CoroutinePCFunction(gdb.Function): def __init__(self): gdb.Function.__init__(self, 'qemu_coroutine_pc') def invoke(self, addr): return get_jmpbuf_regs(coroutine_to_jmpbuf(addr))['rip'].cast(VOID_PTR)