public inbox for binutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Support symbol+offset lookup in addr2line
@ 2021-12-23 17:55 Andi Kleen
  2021-12-30  7:57 ` Alan Modra
  0 siblings, 1 reply; 4+ messages in thread
From: Andi Kleen @ 2021-12-23 17:55 UTC (permalink / raw)
  To: binutils; +Cc: Andi Kleen

The Linux kernel usually ouputs symbol+offset instead of plain code
addresses these days, to avoid leaking ASLR secrets and to handle
dynamically loaded modules.

Converting those with addr2line is somewhat involved: it requires
looking up the symbol first using nm and then manually compute the
offset, and then pass it to addr2line.

This patch implements the necessary steps directly in addr2line,
by looking up the symbol (with demangling if needed) and computing
the offset.

It's possible that a symbol is ambigious with a hex number. In this
case it uses the symbol lookup if the string contains a +. When it isn't
ambigious the + is optional.
---
 binutils/addr2line.c       | 75 +++++++++++++++++++++++++++++++++++---
 binutils/doc/binutils.texi | 18 +++++----
 2 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/binutils/addr2line.c b/binutils/addr2line.c
index 5b02a67567b..4a2582cecfd 100644
--- a/binutils/addr2line.c
+++ b/binutils/addr2line.c
@@ -37,6 +37,7 @@
 #include "demangle.h"
 #include "bucomm.h"
 #include "elf-bfd.h"
+#include "safe-ctype.h"
 
 static bool unwind_inlines;	/* -i, unwind inlined functions. */
 static bool with_addresses;	/* -a, show addresses.  */
@@ -51,6 +52,7 @@ static int demangle_flags = DMGL_PARAMS | DMGL_ANSI;
 static int naddr;		/* Number of addresses to process.  */
 static char **addr;		/* Hex addresses to process.  */
 
+static long symcount;
 static asymbol **syms;		/* Symbol table.  */
 
 static struct option long_options[] =
@@ -116,7 +118,6 @@ static void
 slurp_symtab (bfd *abfd)
 {
   long storage;
-  long symcount;
   bool dynamic = false;
 
   if ((bfd_get_file_flags (abfd) & HAS_SYMS) == 0)
@@ -220,32 +221,94 @@ find_offset_in_section (bfd *abfd, asection *section)
                                                &line, &discriminator);
 }
 
-/* Read hexadecimal addresses from stdin, translate into
+/* Lookup a symbol with offset in symbol table.  */
+
+static bfd_vma
+lookup_symbol (bfd *abfd, char *sym, size_t offset)
+{
+  long i;
+
+  for (i = 0; i < symcount; i++)
+    {
+      if (!strcmp (syms[i]->name, sym))
+	return syms[i]->value + offset + bfd_asymbol_section (syms[i])->vma;
+    }
+  /* Try again mangled */
+  for (i = 0; i < symcount; i++)
+    {
+      char *d = bfd_demangle (abfd, syms[i]->name, demangle_flags);
+      bool match = d && !strcmp (d, sym);
+      free (d);
+
+      if (match)
+	return syms[i]->value + offset + bfd_asymbol_section (syms[i])->vma;
+    }
+  return 0;
+}
+
+/* Split an symbol+offset expression. adr is modified.  */
+
+static bool
+is_symbol (char *adr, char **symp, size_t *offset)
+{
+  char *end;
+
+  while (ISSPACE (*adr))
+    adr++;
+  if (ISDIGIT (*adr) || *adr == 0)
+    return false;
+  /* Could be either symbol or hex number. Check if it has +.  */
+  if (TOUPPER(*adr) >= 'A' && TOUPPER(*adr) <= 'F' && !strchr (adr, '+'))
+    return false;
+
+  *symp = adr;
+  while (*adr && !ISSPACE (*adr) && *adr != '+')
+    adr++;
+  end = adr;
+  while (ISSPACE (*adr))
+    adr++;
+  *offset = 0;
+  if (*adr == '+')
+    {
+      adr++;
+      *offset = strtoul(adr, NULL, 0);
+    }
+  *end = 0;
+  return true;
+}
+
+/* Read hexadecimal or symbolic with offset addresses from stdin, translate into
    file_name:line_number and optionally function name.  */
 
 static void
 translate_addresses (bfd *abfd, asection *section)
 {
   int read_stdin = (naddr == 0);
+  char *adr;
+  char addr_hex[100];
+  char *symp;
+  size_t offset;
 
   for (;;)
     {
       if (read_stdin)
 	{
-	  char addr_hex[100];
-
 	  if (fgets (addr_hex, sizeof addr_hex, stdin) == NULL)
 	    break;
-	  pc = bfd_scan_vma (addr_hex, NULL, 16);
+	  adr = addr_hex;
 	}
       else
 	{
 	  if (naddr <= 0)
 	    break;
 	  --naddr;
-	  pc = bfd_scan_vma (*addr++, NULL, 16);
+	  adr = *addr++;
 	}
 
+      if (is_symbol (adr, &symp, &offset))
+        pc = lookup_symbol (abfd, symp, offset);
+      else
+        pc = bfd_scan_vma (adr, NULL, 16);
       if (bfd_get_flavour (abfd) == bfd_target_elf_flavour)
 	{
 	  const struct elf_backend_data *bed = get_elf_backend_data (abfd);
diff --git a/binutils/doc/binutils.texi b/binutils/doc/binutils.texi
index 9e98f878d25..5bbfaf63d57 100644
--- a/binutils/doc/binutils.texi
+++ b/binutils/doc/binutils.texi
@@ -118,7 +118,7 @@ Demangle encoded C++ symbols (on MS-DOS, this program is named
 @code{cxxfilt})
 
 @item addr2line
-Convert addresses into file names and line numbers
+Convert addresses or symbol+offset into file names and line numbers
 
 @item windres
 Manipulate Windows resources
@@ -146,7 +146,7 @@ in the section entitled ``GNU Free Documentation License''.
 * strip::                       Discard symbols
 * c++filt::			Filter to demangle encoded C++ symbols
 * cxxfilt: c++filt.             MS-DOS name for c++filt
-* addr2line::			Convert addresses to file and line
+* addr2line::			Convert addresses or symbol+offset to file and line
 * windmc::			Generator for Windows message resources
 * windres::			Manipulate Windows resources
 * dlltool::			Create files needed to build and use DLLs
@@ -3897,7 +3897,7 @@ c++filt @var{option} @var{symbol}
 @kindex addr2line
 @cindex address to file name and line number
 
-@c man title addr2line convert addresses into file names and line numbers
+@c man title addr2line convert addresses or symbol+offset into file names and line numbers
 
 @smallexample
 @c man begin SYNOPSIS addr2line
@@ -3918,8 +3918,8 @@ addr2line [@option{-a}|@option{--addresses}]
 
 @c man begin DESCRIPTION addr2line
 
-@command{addr2line} translates addresses into file names and line numbers.
-Given an address in an executable or an offset in a section of a relocatable
+@command{addr2line} translates addresses or symbol+offset into file names and line numbers.
+Given an address or symbol+offset in an executable or an offset in a section of a relocatable
 object, it uses the debugging information to figure out which file name and
 line number are associated with it.
 
@@ -3929,11 +3929,11 @@ object to use is specified with the @option{-j} option.
 
 @command{addr2line} has two modes of operation.
 
-In the first, hexadecimal addresses are specified on the command line,
+In the first, hexadecimal addresses or symbol+offset are specified on the command line,
 and @command{addr2line} displays the file name and line number for each
 address.
 
-In the second, @command{addr2line} reads hexadecimal addresses from
+In the second, @command{addr2line} reads hexadecimal addresses or symbol+offset from
 standard input, and prints the file name and line number for each
 address on standard output.  In this mode, @command{addr2line} may be used
 in a pipe to convert dynamically chosen addresses.
@@ -3970,6 +3970,10 @@ If the file name or function name can not be determined,
 @command{addr2line} will print two question marks in their place.  If the
 line number can not be determined, @command{addr2line} will print 0.
 
+When symbol+offset is used, +offset is optional, except when the symbol
+is ambigious with a hex number. The resolved symbols can be mangled
+or unmangled, except unmangled symbols with + are not allowed.
+
 @c man end
 
 @c man begin OPTIONS addr2line
-- 
2.34.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Support symbol+offset lookup in addr2line
  2021-12-23 17:55 [PATCH] Support symbol+offset lookup in addr2line Andi Kleen
@ 2021-12-30  7:57 ` Alan Modra
  2022-02-04  2:34   ` Andi Kleen
  0 siblings, 1 reply; 4+ messages in thread
From: Alan Modra @ 2021-12-30  7:57 UTC (permalink / raw)
  To: Andi Kleen; +Cc: binutils

On Thu, Dec 23, 2021 at 09:55:07AM -0800, Andi Kleen wrote:
> This patch implements the necessary steps directly in addr2line,
> by looking up the symbol (with demangling if needed) and computing
> the offset.

Looks OK to me.

-- 
Alan Modra
Australia Development Lab, IBM

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Support symbol+offset lookup in addr2line
  2021-12-30  7:57 ` Alan Modra
@ 2022-02-04  2:34   ` Andi Kleen
  2022-02-04  2:44     ` H.J. Lu
  0 siblings, 1 reply; 4+ messages in thread
From: Andi Kleen @ 2022-02-04  2:34 UTC (permalink / raw)
  To: Alan Modra; +Cc: Andi Kleen, binutils

On Thu, Dec 30, 2021 at 06:27:49PM +1030, Alan Modra wrote:
> On Thu, Dec 23, 2021 at 09:55:07AM -0800, Andi Kleen wrote:
> > This patch implements the necessary steps directly in addr2line,
> > by looking up the symbol (with demangling if needed) and computing
> > the offset.
> 
> Looks OK to me.

If there are no further objections, could someone please commit
the patch? Thanks, 
-Andi

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Support symbol+offset lookup in addr2line
  2022-02-04  2:34   ` Andi Kleen
@ 2022-02-04  2:44     ` H.J. Lu
  0 siblings, 0 replies; 4+ messages in thread
From: H.J. Lu @ 2022-02-04  2:44 UTC (permalink / raw)
  To: Andi Kleen; +Cc: Alan Modra, Binutils

On Thu, Feb 3, 2022 at 6:34 PM Andi Kleen <andi@firstfloor.org> wrote:
>
> On Thu, Dec 30, 2021 at 06:27:49PM +1030, Alan Modra wrote:
> > On Thu, Dec 23, 2021 at 09:55:07AM -0800, Andi Kleen wrote:
> > > This patch implements the necessary steps directly in addr2line,
> > > by looking up the symbol (with demangling if needed) and computing
> > > the offset.
> >
> > Looks OK to me.
>
> If there are no further objections, could someone please commit
> the patch? Thanks,
> -Andi

I pushed it for you.

-- 
H.J.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-02-04  2:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-12-23 17:55 [PATCH] Support symbol+offset lookup in addr2line Andi Kleen
2021-12-30  7:57 ` Alan Modra
2022-02-04  2:34   ` Andi Kleen
2022-02-04  2:44     ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).