public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] libdwfl: Use process_vm_readv when available.
@ 2018-03-18  0:43 Mark Wielaard
  2018-03-20 22:32 ` Mark Wielaard
  2018-03-20 23:28 ` Dmitry V. Levin
  0 siblings, 2 replies; 6+ messages in thread
From: Mark Wielaard @ 2018-03-18  0:43 UTC (permalink / raw)
  To: elfutils-devel; +Cc: Mark Wielaard

If possible use process_vm_readv to read 4K blocks instead of fetching
each word individually with ptrace. For unwinding this often means we
only have to do one process_vm_readv of the stack instead of dozens of
ptrace calls. There is one 4K cache per process, cleared whenever a
thread is detached.

Signed-off-by: Mark Wielaard <mark@klomp.org>
---
 ChangeLog                  |  4 +++
 configure.ac               |  6 ++--
 libdwfl/ChangeLog          | 11 +++++++
 libdwfl/libdwflP.h         | 14 ++++++++-
 libdwfl/linux-pid-attach.c | 74 +++++++++++++++++++++++++++++++++++++++++++++-
 5 files changed, 105 insertions(+), 4 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 279c3b2b..81542414 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2018-03-17  Mark Wielaard  <mark@klomp.org>
+
+	* configure.ac (CHECK_FUNCS): Check for process_vm_readv.
+
 2018-02-09  Joshua Watt  <JPEWhacker@gmail.com>
 
 	* configure.ac (HAVE_FALLTHROUGH): New define.
diff --git a/configure.ac b/configure.ac
index 4cdb12af..ab32cbc8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 dnl Process this file with autoconf to produce a configure script.
 dnl Configure input file for elfutils.                     -*-autoconf-*-
 dnl
-dnl Copyright (C) 1996-2017 Red Hat, Inc.
+dnl Copyright (C) 1996-2018 Red Hat, Inc.
 dnl
 dnl This file is part of elfutils.
 dnl
@@ -41,7 +41,7 @@ fi
 AC_CONFIG_AUX_DIR([config])
 AC_CONFIG_FILES([config/Makefile])
 
-AC_COPYRIGHT([Copyright (C) 1996-2017 The elfutils developers.])
+AC_COPYRIGHT([Copyright (C) 1996-2018 The elfutils developers.])
 AC_PREREQ(2.63)			dnl Minimum Autoconf version required.
 
 dnl We use GNU make extensions; automake 1.10 defaults to -Wportability.
@@ -370,6 +370,8 @@ AC_CHECK_DECLS([mempcpy],[],[],
                [#define _GNU_SOURCE
                 #include <string.h>])
 
+AC_CHECK_FUNCS([process_vm_readv])
+
 AC_CHECK_LIB([stdc++], [__cxa_demangle], [dnl
 AC_DEFINE([USE_DEMANGLE], [1], [Defined if demangling is enabled])])
 AM_CONDITIONAL(DEMANGLE, test "x$ac_cv_lib_stdcpp___cxa_demangle" = "xyes")
diff --git a/libdwfl/ChangeLog b/libdwfl/ChangeLog
index 1515c410..9776f1cb 100644
--- a/libdwfl/ChangeLog
+++ b/libdwfl/ChangeLog
@@ -1,3 +1,14 @@
+2018-03-17  Mark Wielaard  <mark@klomp.org>
+
+	* libdwflP.h (struct __libdwfl_remote_mem_cache): New.
+	(struct __libdwfl_pid_arg): Add mem_cache field.
+	* linux-pid-attach.c (read_cached_memory): New function.
+	(clear_cached_memory): Likewise.
+	(pid_memory_read): Call read_cached_memory.
+	(pid_detach): Free mem_cache.
+	(pid_thread_detach): Call clear_cached_memory.
+	(dwfl_linux_proc_attach): Initialize mem_cache to NULL.
+
 2018-03-05  Mark Wielaard  <mark@klomp.org>
 
 	* dwfl_build_id_find_elf.c (__libdwfl_open_by_build_id): Use
diff --git a/libdwfl/libdwflP.h b/libdwfl/libdwflP.h
index 7d5f795c..15ca0a11 100644
--- a/libdwfl/libdwflP.h
+++ b/libdwfl/libdwflP.h
@@ -1,5 +1,5 @@
 /* Internal definitions for libdwfl.
-   Copyright (C) 2005-2015 Red Hat, Inc.
+   Copyright (C) 2005-2015, 2018 Red Hat, Inc.
    This file is part of elfutils.
 
    This file is free software; you can redistribute it and/or modify
@@ -401,6 +401,14 @@ struct dwfl_arange
   size_t arange;		/* Index in Dwarf_Aranges.  */
 };
 
+#define __LIBDWFL_REMOTE_MEM_CACHE_SIZE 4096
+/* Structure for caching remote memory reads as used by __libdwfl_pid_arg.  */
+struct __libdwfl_remote_mem_cache
+{
+  Dwarf_Addr addr; /* Remote address.  */
+  Dwarf_Off len;   /* Zero if cleared, otherwise likely 4K. */
+  unsigned char buf[__LIBDWFL_REMOTE_MEM_CACHE_SIZE]; /* The actual cache.  */
+};
 
 /* Structure used for keeping track of ptrace attaching a thread.
    Shared by linux-pid-attach and linux-proc-maps.  If it has been setup
@@ -411,6 +419,10 @@ struct __libdwfl_pid_arg
   DIR *dir;
   /* Elf for /proc/PID/exe.  Set to NULL if it couldn't be opened.  */
   Elf *elf;
+  /* Remote memory cache, NULL if there is no memory cached.
+     Should be cleared on detachment (because that makes the thread
+     runnable and the cache invalid).  */
+  struct __libdwfl_remote_mem_cache *mem_cache;
   /* fd for /proc/PID/exe.  Set to -1 if it couldn't be opened.  */
   int elf_fd;
   /* It is 0 if not used.  */
diff --git a/libdwfl/linux-pid-attach.c b/libdwfl/linux-pid-attach.c
index 2ab4109c..ea65618f 100644
--- a/libdwfl/linux-pid-attach.c
+++ b/libdwfl/linux-pid-attach.c
@@ -1,5 +1,5 @@
 /* Get Dwarf Frame state for target live PID process.
-   Copyright (C) 2013, 2014, 2015 Red Hat, Inc.
+   Copyright (C) 2013, 2014, 2015, 2018 Red Hat, Inc.
    This file is part of elfutils.
 
    This file is free software; you can redistribute it and/or modify
@@ -34,6 +34,7 @@
 #include "libdwflP.h"
 #include <sys/types.h>
 #include <sys/stat.h>
+#include <sys/uio.h>
 #include <fcntl.h>
 #include <dirent.h>
 #include <unistd.h>
@@ -115,12 +116,80 @@ __libdwfl_ptrace_attach (pid_t tid, bool *tid_was_stoppedp)
   return true;
 }
 
+#ifdef HAVE_PROCESS_VM_READV
+static bool
+read_cached_memory (struct __libdwfl_pid_arg *pid_arg,
+		    Dwarf_Addr addr, Dwarf_Word *result)
+{
+  /* Let the ptrace fallback deal with the corner case of the address
+     possibly crossing a page boundery.  */
+  if ((addr & ((Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - 1))
+      > (Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - sizeof (unsigned long))
+    return false;
+
+  struct __libdwfl_remote_mem_cache *mem_cache = pid_arg->mem_cache;
+  if (mem_cache == NULL)
+    {
+      size_t mem_cache_size = sizeof (struct __libdwfl_remote_mem_cache);
+      mem_cache = (struct __libdwfl_remote_mem_cache *) malloc (mem_cache_size);
+      if (mem_cache == NULL)
+	return false;
+
+      mem_cache->addr = 0;
+      mem_cache->len = 0;
+      pid_arg->mem_cache = mem_cache;
+    }
+
+  unsigned char *d;
+  if (addr >= mem_cache->addr && addr - mem_cache->addr < mem_cache->len)
+    {
+      d = &mem_cache->buf[addr - mem_cache->addr];
+      *result = *(unsigned long *) d;
+      return true;
+    }
+
+  struct iovec local, remote;
+  mem_cache->addr = addr & ~((Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - 1);
+  local.iov_base = mem_cache->buf;
+  local.iov_len = __LIBDWFL_REMOTE_MEM_CACHE_SIZE;
+  remote.iov_base = (void *) (uintptr_t) mem_cache->addr;
+  remote.iov_len = __LIBDWFL_REMOTE_MEM_CACHE_SIZE;
+
+  ssize_t res = process_vm_readv (pid_arg->tid_attached,
+				  &local, 1, &remote, 1, 0);
+  if (res != __LIBDWFL_REMOTE_MEM_CACHE_SIZE)
+    {
+      mem_cache->len = 0;
+      return false;
+    }
+
+  mem_cache->len = res;
+  d = &mem_cache->buf[addr - mem_cache->addr];
+  *result = *((unsigned long *) d);
+  return true;
+}
+#endif /* HAVE_PROCESS_VM_READV */
+
+static void
+clear_cached_memory (struct __libdwfl_pid_arg *pid_arg)
+{
+  struct __libdwfl_remote_mem_cache *mem_cache = pid_arg->mem_cache;
+  if (mem_cache != NULL)
+    mem_cache->len = 0;
+}
+
 static bool
 pid_memory_read (Dwfl *dwfl, Dwarf_Addr addr, Dwarf_Word *result, void *arg)
 {
   struct __libdwfl_pid_arg *pid_arg = arg;
   pid_t tid = pid_arg->tid_attached;
   assert (tid > 0);
+
+#ifdef HAVE_PROCESS_VM_READV
+  if (read_cached_memory (pid_arg, addr, result))
+    return true;
+#endif
+
   Dwfl_Process *process = dwfl->process;
   if (ebl_get_elfclass (process->ebl) == ELFCLASS64)
     {
@@ -253,6 +322,7 @@ pid_detach (Dwfl *dwfl __attribute__ ((unused)), void *dwfl_arg)
 {
   struct __libdwfl_pid_arg *pid_arg = dwfl_arg;
   elf_end (pid_arg->elf);
+  free (pid_arg->mem_cache);
   close (pid_arg->elf_fd);
   closedir (pid_arg->dir);
   free (pid_arg);
@@ -278,6 +348,7 @@ pid_thread_detach (Dwfl_Thread *thread, void *thread_arg)
   pid_t tid = INTUSE(dwfl_thread_tid) (thread);
   assert (pid_arg->tid_attached == tid);
   pid_arg->tid_attached = 0;
+  clear_cached_memory (pid_arg);
   if (! pid_arg->assume_ptrace_stopped)
     __libdwfl_ptrace_detach (tid, pid_arg->tid_was_stopped);
 }
@@ -379,6 +450,7 @@ dwfl_linux_proc_attach (Dwfl *dwfl, pid_t pid, bool assume_ptrace_stopped)
   pid_arg->dir = dir;
   pid_arg->elf = elf;
   pid_arg->elf_fd = elf_fd;
+  pid_arg->mem_cache = NULL;
   pid_arg->tid_attached = 0;
   pid_arg->assume_ptrace_stopped = assume_ptrace_stopped;
   if (! INTUSE(dwfl_attach_state) (dwfl, elf, pid, &pid_thread_callbacks,
-- 
2.16.1

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libdwfl: Use process_vm_readv when available.
  2018-03-18  0:43 [PATCH] libdwfl: Use process_vm_readv when available Mark Wielaard
@ 2018-03-20 22:32 ` Mark Wielaard
  2018-03-28 14:33   ` Mark Wielaard
  2018-03-20 23:28 ` Dmitry V. Levin
  1 sibling, 1 reply; 6+ messages in thread
From: Mark Wielaard @ 2018-03-20 22:32 UTC (permalink / raw)
  To: elfutils-devel

On Sun, Mar 18, 2018 at 01:43:23AM +0100, Mark Wielaard wrote:
> If possible use process_vm_readv to read 4K blocks instead of fetching
> each word individually with ptrace. For unwinding this often means we
> only have to do one process_vm_readv of the stack instead of dozens of
> ptrace calls. There is one 4K cache per process, cleared whenever a
> thread is detached.

It seems to work well, but the GCC undefined sanitizer
(configure --enable-sanitize-undefined) found an issue in the
run-backtrace-native-biarch.sh testcase (from x86_64 to i686)
when reading unaligned data. To fix that don't assign to the
Dwarf_Word directly when unaligned, but use memcpy (which gcc
seems to inline).

diff --git a/libdwfl/linux-pid-attach.c b/libdwfl/linux-pid-attach.c
index ea65618f..6295e391 100644
--- a/libdwfl/linux-pid-attach.c
+++ b/libdwfl/linux-pid-attach.c
@@ -144,7 +144,10 @@ read_cached_memory (struct __libdwfl_pid_arg *pid_arg,
   if (addr >= mem_cache->addr && addr - mem_cache->addr < mem_cache->len)
     {
       d = &mem_cache->buf[addr - mem_cache->addr];
-      *result = *(unsigned long *) d;
+      if ((((uintptr_t) d) & (sizeof (unsigned long) - 1)) == 0)
+	*result = *(unsigned long *) d;
+      else
+	memcpy (result, d, sizeof (unsigned long));
       return true;
     }

@@ -165,7 +168,10 @@ read_cached_memory (struct __libdwfl_pid_arg *pid_arg,

   mem_cache->len = res;
   d = &mem_cache->buf[addr - mem_cache->addr];
-  *result = *((unsigned long *) d);
+  if ((((uintptr_t) d) & (sizeof (unsigned long) - 1)) == 0)
+    *result = *(unsigned long *) d;
+  else
+    memcpy (result, d, sizeof (unsigned long));
   return true;
 }
 #endif /* HAVE_PROCESS_VM_READV */

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libdwfl: Use process_vm_readv when available.
  2018-03-18  0:43 [PATCH] libdwfl: Use process_vm_readv when available Mark Wielaard
  2018-03-20 22:32 ` Mark Wielaard
@ 2018-03-20 23:28 ` Dmitry V. Levin
  2018-03-21  0:47   ` Mark Wielaard
  1 sibling, 1 reply; 6+ messages in thread
From: Dmitry V. Levin @ 2018-03-20 23:28 UTC (permalink / raw)
  To: elfutils-devel

[-- Attachment #1: Type: text/plain, Size: 1575 bytes --]

On Sun, Mar 18, 2018 at 01:43:23AM +0100, Mark Wielaard wrote:
[...]
> @@ -115,12 +116,80 @@ __libdwfl_ptrace_attach (pid_t tid, bool *tid_was_stoppedp)
>    return true;
>  }
>  
> +#ifdef HAVE_PROCESS_VM_READV
> +static bool
> +read_cached_memory (struct __libdwfl_pid_arg *pid_arg,
> +		    Dwarf_Addr addr, Dwarf_Word *result)
> +{
> +  /* Let the ptrace fallback deal with the corner case of the address
> +     possibly crossing a page boundery.  */
> +  if ((addr & ((Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - 1))
> +      > (Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - sizeof (unsigned long))

It looks odd that the variable that is going to be assigned has type
Dwarf_Word, while the size being checked has type unsigned long.
Shouldn't it be sizeof(*result) instead?

> +    return false;
> +
> +  struct __libdwfl_remote_mem_cache *mem_cache = pid_arg->mem_cache;
> +  if (mem_cache == NULL)
> +    {
> +      size_t mem_cache_size = sizeof (struct __libdwfl_remote_mem_cache);
> +      mem_cache = (struct __libdwfl_remote_mem_cache *) malloc (mem_cache_size);
> +      if (mem_cache == NULL)
> +	return false;
> +
> +      mem_cache->addr = 0;
> +      mem_cache->len = 0;
> +      pid_arg->mem_cache = mem_cache;
> +    }
> +
> +  unsigned char *d;
> +  if (addr >= mem_cache->addr && addr - mem_cache->addr < mem_cache->len)
> +    {
> +      d = &mem_cache->buf[addr - mem_cache->addr];
> +      *result = *(unsigned long *) d;

Likewise, shouldn't it be memcpy(result, d, sizeof(*result)) instead?


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libdwfl: Use process_vm_readv when available.
  2018-03-20 23:28 ` Dmitry V. Levin
@ 2018-03-21  0:47   ` Mark Wielaard
  0 siblings, 0 replies; 6+ messages in thread
From: Mark Wielaard @ 2018-03-21  0:47 UTC (permalink / raw)
  To: elfutils-devel

On Wed, Mar 21, 2018 at 02:28:48AM +0300, Dmitry V. Levin wrote:
> On Sun, Mar 18, 2018 at 01:43:23AM +0100, Mark Wielaard wrote:
> > +  /* Let the ptrace fallback deal with the corner case of the address
> > +     possibly crossing a page boundery.  */
> > +  if ((addr & ((Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - 1))
> > +      > (Dwarf_Addr)__LIBDWFL_REMOTE_MEM_CACHE_SIZE - sizeof (unsigned long))
> 
> It looks odd that the variable that is going to be assigned has type
> Dwarf_Word, while the size being checked has type unsigned long.
> Shouldn't it be sizeof(*result) instead?
> 
> > +      d = &mem_cache->buf[addr - mem_cache->addr];
> > +      *result = *(unsigned long *) d;
> 
> Likewise, shouldn't it be memcpy(result, d, sizeof(*result)) instead?

That is indeed not immediately clear. I'll add some documentation.
Although the functions do use Dwarf_Word (which is always 64bits)
they actually return the result of an unsigned long/address. This
is true for both the pid based and core based memory read functions.
I am not completely sure if this was originally deliberate, or if
this was the accidental result of the ptrace interface returning a
long (target word) for PTRACE_PEEKDATA.

Thanks for reviewing.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libdwfl: Use process_vm_readv when available.
  2018-03-20 22:32 ` Mark Wielaard
@ 2018-03-28 14:33   ` Mark Wielaard
  2018-03-28 21:42     ` Dmitry V. Levin
  0 siblings, 1 reply; 6+ messages in thread
From: Mark Wielaard @ 2018-03-28 14:33 UTC (permalink / raw)
  To: elfutils-devel

On Tue, 2018-03-20 at 23:32 +0100, Mark Wielaard wrote:
> On Sun, Mar 18, 2018 at 01:43:23AM +0100, Mark Wielaard wrote:
> > If possible use process_vm_readv to read 4K blocks instead of fetching
> > each word individually with ptrace. For unwinding this often means we
> > only have to do one process_vm_readv of the stack instead of dozens of
> > ptrace calls. There is one 4K cache per process, cleared whenever a
> > thread is detached.
> 
> It seems to work well, but the GCC undefined sanitizer
> (configure --enable-sanitize-undefined) found an issue in the
> run-backtrace-native-biarch.sh testcase (from x86_64 to i686)
> when reading unaligned data. To fix that don't assign to the
> Dwarf_Word directly when unaligned, but use memcpy (which gcc
> seems to inline).

I pushed this to master now. Adding some comments about the word size
being actually architecture defined even though we use a 64bit
Dwarf_Word everywhere.

Cheers,

Mark

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] libdwfl: Use process_vm_readv when available.
  2018-03-28 14:33   ` Mark Wielaard
@ 2018-03-28 21:42     ` Dmitry V. Levin
  0 siblings, 0 replies; 6+ messages in thread
From: Dmitry V. Levin @ 2018-03-28 21:42 UTC (permalink / raw)
  To: elfutils-devel

[-- Attachment #1: Type: text/plain, Size: 1076 bytes --]

On Wed, Mar 28, 2018 at 04:33:26PM +0200, Mark Wielaard wrote:
> On Tue, 2018-03-20 at 23:32 +0100, Mark Wielaard wrote:
> > On Sun, Mar 18, 2018 at 01:43:23AM +0100, Mark Wielaard wrote:
> > > If possible use process_vm_readv to read 4K blocks instead of fetching
> > > each word individually with ptrace. For unwinding this often means we
> > > only have to do one process_vm_readv of the stack instead of dozens of
> > > ptrace calls. There is one 4K cache per process, cleared whenever a
> > > thread is detached.
> > 
> > It seems to work well, but the GCC undefined sanitizer
> > (configure --enable-sanitize-undefined) found an issue in the
> > run-backtrace-native-biarch.sh testcase (from x86_64 to i686)
> > when reading unaligned data. To fix that don't assign to the
> > Dwarf_Word directly when unaligned, but use memcpy (which gcc
> > seems to inline).
> 
> I pushed this to master now. Adding some comments about the word size
> being actually architecture defined even though we use a 64bit
> Dwarf_Word everywhere.

Thanks!


-- 
ldv

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 801 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-03-28 21:42 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-18  0:43 [PATCH] libdwfl: Use process_vm_readv when available Mark Wielaard
2018-03-20 22:32 ` Mark Wielaard
2018-03-28 14:33   ` Mark Wielaard
2018-03-28 21:42     ` Dmitry V. Levin
2018-03-20 23:28 ` Dmitry V. Levin
2018-03-21  0:47   ` Mark Wielaard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).