public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Read DWARF5 .debug_line.
@ 2020-10-12 19:06 Mark Wielaard
  2020-10-12 19:06 ` [PATCH 2/2] Write DWARF5 multifile .debug_line if possible Mark Wielaard
  2020-10-13 14:19 ` [PATCH 1/2] Read DWARF5 .debug_line Jakub Jelinek
  0 siblings, 2 replies; 5+ messages in thread
From: Mark Wielaard @ 2020-10-12 19:06 UTC (permalink / raw)
  To: dwz; +Cc: Mark Wielaard

This handles reading DWARF5 .debug_line just like earlier DWARF versions.
Sets file time and size to zero when not present. It skips the zero entry
file entry (which cannot be referred to from any attribute since the value
zero indicates that no source file), but does read the zero dir entry
(which can be referred to from the file index). It does not yet handle
MD5 checksums.

ChangeLog:

	* dwz.c (get_DW_LNCT_str): New function.
	(skip_attr_no_dw_form_indirect): Move before read_debug_line.
	(read_debug_line): Handle version 5.
---
 dwz.c | 611 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 487 insertions(+), 124 deletions(-)

diff --git a/dwz.c b/dwz.c
index 9dbf651..9345e6c 100644
--- a/dwz.c
+++ b/dwz.c
@@ -711,6 +711,28 @@ get_DW_UT_str (unsigned int ut)
   return buf;
 }
 
+/* Retrun a DW_LNCT_* name.  */
+static const char *
+get_DW_LNCT_str (unsigned int lnct)
+{
+  const char *name;
+  static char buf[9 + 3 * sizeof (int)];
+  switch (lnct)
+    {
+    case DW_LNCT_path: name = "DW_LNCT_path"; break;
+    case DW_LNCT_directory_index: name = "DW_LNCT_directory_index"; break;
+    case DW_LNCT_timestamp: name = "DW_LNCT_timestamp"; break;
+    case DW_LNCT_size: name = "DW_LNCT_size"; break;
+    case DW_LNCT_MD5: name = "DW_LNCT_MD5"; break;
+
+    default: name = 0; break;
+    }
+  if (name)
+    return name;
+  sprintf (buf, "DW_LNCT_%u", lnct);
+  return buf;
+}
+
 /* This must match the debug_sections array content
    below.  */
 enum debug_section_kind
@@ -1345,6 +1367,86 @@ read_abbrev (DSO *dso, unsigned char *ptr)
   return h;
 }
 
+/* For a die attribute with form FORM starting at PTR, with the die in CU,
+   return the pointer after the attribute, assuming FORM is not
+   dw_form_indirect.  */
+static inline unsigned char * FORCE_INLINE
+skip_attr_no_dw_form_indirect (unsigned int cu_version, uint32_t form,
+			       unsigned char *ptr)
+{
+  size_t len = 0;
+
+  switch (form)
+    {
+    case DW_FORM_ref_addr:
+      ptr += cu_version == 2 ? ptr_size : 4;
+      break;
+    case DW_FORM_addr:
+      ptr += ptr_size;
+      break;
+    case DW_FORM_flag_present:
+    case DW_FORM_implicit_const:
+      break;
+    case DW_FORM_ref1:
+    case DW_FORM_flag:
+    case DW_FORM_data1:
+      ++ptr;
+      break;
+    case DW_FORM_ref2:
+    case DW_FORM_data2:
+      ptr += 2;
+      break;
+    case DW_FORM_ref4:
+    case DW_FORM_data4:
+    case DW_FORM_sec_offset:
+    case DW_FORM_strp:
+    case DW_FORM_line_strp:
+      ptr += 4;
+      break;
+    case DW_FORM_ref8:
+    case DW_FORM_data8:
+    case DW_FORM_ref_sig8:
+      ptr += 8;
+      break;
+    case DW_FORM_data16:
+      ptr += 16;
+      break;
+    case DW_FORM_sdata:
+    case DW_FORM_ref_udata:
+    case DW_FORM_udata:
+      skip_leb128 (ptr);
+      break;
+    case DW_FORM_string:
+      ptr = (unsigned char *) strchr ((char *)ptr, '\0') + 1;
+      break;
+    case DW_FORM_indirect:
+      abort ();
+    case DW_FORM_block1:
+      len = *ptr++;
+      break;
+    case DW_FORM_block2:
+      len = read_16 (ptr);
+      form = DW_FORM_block1;
+      break;
+    case DW_FORM_block4:
+      len = read_32 (ptr);
+      form = DW_FORM_block1;
+      break;
+    case DW_FORM_block:
+    case DW_FORM_exprloc:
+      len = read_uleb128 (ptr);
+      form = DW_FORM_block1;
+      break;
+    default:
+      abort ();
+    }
+
+  if (form == DW_FORM_block1)
+    ptr += len;
+
+  return ptr;
+}
+
 /* Read the directory and file table from .debug_line offset OFF,
    record it in CU.  */
 static int
@@ -1356,7 +1458,16 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
   unsigned char *endcu, *endprol;
   unsigned char opcode_base;
   unsigned int culen;
-  uint32_t value, dirt_cnt, file_cnt;
+  uint32_t value, version, ndirs, nfiles, dirt_cnt, file_cnt;
+  /* DWARF5 has a dynamic table of elements in possible different
+     forms.  But we are only interested in the known elements (path,
+     dir index, time, size and possibly later md5).  */
+  unsigned char n, nelems = 0;
+  int path_ndx = -1;
+  int dir_ndx = -1;
+  int time_ndx = -1;
+  int size_ndx = -1;
+  uint16_t elems[256];
 
   if (off >= debug_sections[DEBUG_LINE].size - 4)
     {
@@ -1384,12 +1495,37 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
     }
 
   value = read_16 (ptr);
-  if (value < 2 || value > 4)
+  if (value < 2 || value > 5)
     {
       error (0, 0, "%s: DWARF version %d in .debug_line unhandled",
 	     dso->filename, value);
       return 1;
     }
+  version = value;
+
+  if (version >= 5)
+    {
+      int addr_size, seg_size;
+      if (ptr + 2 > endcu)
+	{
+	  error (0, 0, "%s: .debug_line header too short", dso->filename);
+	  return 1;
+	}
+      addr_size = *ptr++;
+      seg_size = *ptr++;
+      if (addr_size != ptr_size)
+	{
+	  error (0, 0, "%s: .debug_line address size differs from CU ptr size",
+		 dso->filename);
+	  return 1;
+	}
+      if (seg_size != 0)
+	{
+	  error (0, 0, "%s: .debug_line non-zero segment selector size",
+		 dso->filename);
+	  return 1;
+	}
+    }
 
   endprol = ptr + 4;
   endprol += read_32 (ptr);
@@ -1400,73 +1536,381 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
       return 1;
     }
 
-  opcode_base = ptr[4 + (value >= 4)];
-  ptr = dir = ptr + 4 + (value >= 4) + opcode_base;
+  opcode_base = ptr[4 + (version >= 4)];
+  ptr = dir = ptr + 4 + (version >= 4) + opcode_base;
 
   /* dir table: */
-  value = 1;
-  while (*ptr != 0)
+  if (version < 5)
+    {
+      value = 1;
+      while (*ptr != 0)
+	{
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	  ++value;
+	}
+      ndirs = value;
+    }
+  else
     {
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
-      ++value;
+      nelems = *ptr++;
+      for (n = 0; n < nelems; n++)
+	{
+	  uint16_t lnct = read_uleb128 (ptr);
+	  uint16_t form = read_uleb128 (ptr);
+	  if (lnct == DW_LNCT_path)
+	    {
+	      if (path_ndx != -1)
+		{
+		  error (0, 0, "%s: .debug_line duplicate dir path elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      path_ndx = n;
+	    }
+	  else
+	    {
+	      error (0, 0, "%s: .debug_line unhandled dir element %s\n",
+		     dso->filename, get_DW_LNCT_str (lnct));
+	      return 1;
+	    }
+
+	  if (form != DW_FORM_string
+	      && form != DW_FORM_strp
+	      && form != DW_FORM_line_strp)
+	    {
+	      error (0, 0, "%s: .debug_line unhandled form %s for dir path\n",
+		     dso->filename, get_DW_FORM_str (form));
+	      return 1;
+	    }
+
+	  elems[n] = form;
+	}
+
+      ndirs = read_uleb128 (ptr);
     }
 
-  dirt = (unsigned char **) alloca (value * sizeof (unsigned char *));
-  dirt[0] = NULL;
-  dirt_cnt = 1;
-  ptr = dir;
-  while (*ptr != 0)
+  dirt = (unsigned char **) alloca (ndirs * sizeof (unsigned char *));
+  if (version < 5)
+    {
+      dirt[0] = NULL;
+      dirt_cnt = 1;
+      ptr = dir;
+      while (*ptr != 0)
+	{
+	  dirt[dirt_cnt++] = ptr;
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	}
+      ptr++;
+    }
+  else
     {
-      dirt[dirt_cnt++] = ptr;
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+      for (dirt_cnt = 0; dirt_cnt < ndirs; dirt_cnt++)
+	{
+	  for (n = 0; n < nelems; n++)
+	    {
+	      uint32_t form = elems[n];
+	      if (n == path_ndx)
+		{
+		  unsigned char *d;
+		  switch (form)
+		    {
+		    case DW_FORM_string:
+		      d = (unsigned char *) ptr;
+		      break;
+		    case DW_FORM_strp:
+		      {
+			unsigned int strp = do_read_32 (ptr);
+			if (strp >= debug_sections[DEBUG_STR].size)
+			  d = NULL;
+			else
+			  d = ((unsigned char *)
+			       debug_sections[DEBUG_STR].data
+			       + strp);
+		      }
+		      break;
+		    case DW_FORM_line_strp:
+		      {
+			unsigned int line_strp = do_read_32 (ptr);
+			if (line_strp >= debug_sections[DEBUG_LINE_STR].size)
+			  d = NULL;
+			else
+			  d = ((unsigned char *)
+			       debug_sections[DEBUG_LINE_STR].data
+			       + line_strp);
+		      }
+		      break;
+		    default:
+		      d = NULL;
+		      break;
+		    }
+
+		  if (d == NULL)
+		    {
+		      error (0, 0, "%s: .debug_line bad dir path\n",
+			     dso->filename);
+		      return 1;
+		    }
+
+		  /* Note we do this even for the zero entry, which is
+		     marked as NULL for pre-DWARF5 line tables.  This
+		     is important for when we merge file entries
+		     together for a multifile because the zero dir
+		     entry could differ.  It is should be equivalent
+		     to the CU DIE comp_dir attribute, but we don't
+		     track that all CUs referring to the (same) line
+		     table share identical an DW_AT_comp_dir value.  */
+		  dirt[dirt_cnt] = d;
+		}
+	      ptr = skip_attr_no_dw_form_indirect (cu->cu_version, form, ptr);
+	    }
+	}
     }
-  ptr++;
 
   /* file table: */
   file = ptr;
-  file_cnt = 0;
-  while (*ptr != 0)
+  if (version < 5)
     {
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
-      value = read_uleb128 (ptr);
+      file_cnt = 0;
+      while (*ptr != 0)
+	{
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	  value = read_uleb128 (ptr);
+
+	  if (value >= dirt_cnt)
+	    {
+	      error (0, 0, "%s: Wrong directory table index %u",
+		     dso->filename, value);
+	      return 1;
+	    }
 
-      if (value >= dirt_cnt)
+	  skip_leb128 (ptr);
+	  skip_leb128 (ptr);
+	  file_cnt++;
+	}
+      nfiles = file_cnt;
+    }
+  else
+    {
+      nelems = *ptr++;
+      path_ndx = -1;
+      for (n = 0; n < nelems; n++)
 	{
-	  error (0, 0, "%s: Wrong directory table index %u",
-		 dso->filename, value);
-	  return 1;
+	  uint16_t lnct = read_uleb128 (ptr);
+	  uint16_t form = read_uleb128 (ptr);
+	  switch (lnct)
+	    {
+	    case DW_LNCT_path:
+	      if (path_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file path elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      path_ndx = n;
+
+	      /* Currently we only handle two string form which always
+		 stay... */
+	      if (form != DW_FORM_string && form != DW_FORM_line_strp)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for file path\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    case DW_LNCT_directory_index:
+	      if (dir_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file dir elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      dir_ndx = n;
+
+	      if (form != DW_FORM_data1
+		  && form != DW_FORM_data2
+		  && form != DW_FORM_udata)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for dir index\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    case DW_LNCT_timestamp:
+	      if (time_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file time elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      time_ndx = n;
+
+	      if (form != DW_FORM_udata
+		  && form != DW_FORM_data4
+		  && form != DW_FORM_data8)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for file time\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    case DW_LNCT_size:
+	      if (size_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file size elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      size_ndx = n;
+
+	      if (form != DW_FORM_udata
+		  && form != DW_FORM_data1
+		  && form != DW_FORM_data2
+		  && form != DW_FORM_data4
+		  && form != DW_FORM_data8)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for file size\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    default:
+	      error (0, 0, "%s: .debug_line unhandled file element %s\n",
+		     dso->filename, get_DW_LNCT_str (lnct));
+	      return 1;
+	    }
+	  elems[n] = form;
 	}
 
-      skip_leb128 (ptr);
-      skip_leb128 (ptr);
-      file_cnt++;
+      nfiles = read_uleb128 (ptr);
+      nfiles--; /* We will skip the first (zero) entry.  */
     }
 
-  cu->cu_nfiles = file_cnt;
-  cu->cu_files = pool_alloc (dw_file, file_cnt * sizeof (struct dw_file));
-  memset (cu->cu_files, 0, file_cnt * sizeof (struct dw_file));
+  cu->cu_nfiles = nfiles;
+  cu->cu_files = pool_alloc (dw_file, nfiles * sizeof (struct dw_file));
+  memset (cu->cu_files, 0, nfiles * sizeof (struct dw_file));
+
+  if (version < 5)
+    ptr = file;
 
-  ptr = file;
-  file_cnt = 0;
-  while (*ptr != 0)
+  for (file_cnt = 0; file_cnt < nfiles; file_cnt++)
     {
-      unsigned char *end;
-      cu->cu_files[file_cnt].file = (char *) ptr;
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
-      end = ptr;
-      value = read_uleb128 (ptr);
+      char *f = NULL;
+      char *end = NULL;
+      uint32_t d = 0;
+      uint64_t time = 0;
+      uint64_t size = 0;
+      if (version < 5)
+	{
+	  f = (char *) ptr;
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	  end = (char *) ptr;
+	  d = read_uleb128 (ptr);
+	  time = read_uleb128 (ptr);
+	  size = read_uleb128 (ptr);
+	}
+      else
+	{
+	  /* Skip zero entry.  */
+	  if (file_cnt == 0)
+	    for (n = 0; n < nelems; n++)
+	      ptr = skip_attr_no_dw_form_indirect (cu->cu_version,
+						   elems[n], ptr);
+
+	  for (n = 0; n < nelems; n++)
+	    {
+	      uint32_t form = elems[n];
+	      if (n == path_ndx)
+		{
+		  switch (form)
+		    {
+		    case DW_FORM_string:
+		      f = (char *) ptr;
+		      end = strchr ((char *)ptr, 0) + 1;;
+		      break;
+		    case DW_FORM_strp:
+		      {
+			unsigned int strp = do_read_32 (ptr);
+			if (strp >= debug_sections[DEBUG_STR].size)
+			  f = NULL;
+			else
+			  {
+			    f = ((char *) debug_sections[DEBUG_STR].data
+				 + strp);
+			    end = f + strlen (f) + 1;
+			  }
+		      }
+		      break;
+		    case DW_FORM_line_strp:
+		      {
+			unsigned int line_strp = do_read_32 (ptr);
+			if (line_strp >= debug_sections[DEBUG_LINE_STR].size)
+			  f = NULL;
+			else
+			  {
+			    f = ((char *) debug_sections[DEBUG_LINE_STR].data
+				 + line_strp);
+			    end = f + strlen (f) + 1;
+			  }
+		      }
+		      break;
+		    default:
+		      f = NULL;
+		      break;
+		    }
+
+		  if (f == NULL)
+		    {
+		      error (0, 0, "%s: .debug_line bad file path\n",
+			     dso->filename);
+		      return 1;
+		    }
+		}
+	      else if (n == dir_ndx)
+		{
+		  switch (form)
+		    {
+		    case DW_FORM_data1:
+		      d = *ptr;
+		      break;
+		    case DW_FORM_data2:
+		      d = do_read_16 (ptr);
+		      break;
+		    case DW_FORM_udata:
+		      {
+			unsigned char *p = ptr;
+			d = read_uleb128 (p);
+		      }
+		      break;
+		    }
+		}
+	      ptr = skip_attr_no_dw_form_indirect (cu->cu_version, form, ptr);
+	    }
+	}
 
-      if (value >= dirt_cnt)
+      cu->cu_files[file_cnt].file = f;
+      if (d >= dirt_cnt)
 	{
 	  error (0, 0, "%s: Wrong directory table index %u",
 		 dso->filename, value);
 	  return 1;
 	}
 
-      cu->cu_files[file_cnt].dir = (char *) dirt[value];
-      cu->cu_files[file_cnt].time = read_uleb128 (ptr);
-      cu->cu_files[file_cnt].size = read_uleb128 (ptr);
-      size_t file_len = (char *) end - cu->cu_files[file_cnt].file;
+      cu->cu_files[file_cnt].dir = (char *) dirt[d];
+      cu->cu_files[file_cnt].time = time;
+      cu->cu_files[file_cnt].size = size;
+      size_t file_len = (char *) end - f;
       size_t strlen_file = file_len - 1;
       bool file_has_slash = false;
       if (cu->cu_files[file_cnt].file[0] != '/'
@@ -1496,7 +1940,6 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
 	   && cu->cu_files[file_cnt].file[0] == '<'
 	   && cu->cu_files[file_cnt].file[strlen_file - 1] == '>'
 	   && strchr (cu->cu_files[file_cnt].file, '/') == NULL);
-      file_cnt++;
     }
 
   return 0;
@@ -1702,86 +2145,6 @@ off_htab_lookup (dw_cu_ref cu, unsigned int die_offset)
   return (dw_die_ref) htab_find_with_hash (off_htab, &die, off_hash (&die));
 }
 
-/* For a die attribute with form FORM starting at PTR, with the die in CU,
-   return the pointer after the attribute, assuming FORM is not
-   dw_form_indirect.  */
-static inline unsigned char * FORCE_INLINE
-skip_attr_no_dw_form_indirect (unsigned int cu_version, uint32_t form,
-			       unsigned char *ptr)
-{
-  size_t len = 0;
-
-  switch (form)
-    {
-    case DW_FORM_ref_addr:
-      ptr += cu_version == 2 ? ptr_size : 4;
-      break;
-    case DW_FORM_addr:
-      ptr += ptr_size;
-      break;
-    case DW_FORM_flag_present:
-    case DW_FORM_implicit_const:
-      break;
-    case DW_FORM_ref1:
-    case DW_FORM_flag:
-    case DW_FORM_data1:
-      ++ptr;
-      break;
-    case DW_FORM_ref2:
-    case DW_FORM_data2:
-      ptr += 2;
-      break;
-    case DW_FORM_ref4:
-    case DW_FORM_data4:
-    case DW_FORM_sec_offset:
-    case DW_FORM_strp:
-    case DW_FORM_line_strp:
-      ptr += 4;
-      break;
-    case DW_FORM_ref8:
-    case DW_FORM_data8:
-    case DW_FORM_ref_sig8:
-      ptr += 8;
-      break;
-    case DW_FORM_data16:
-      ptr += 16;
-      break;
-    case DW_FORM_sdata:
-    case DW_FORM_ref_udata:
-    case DW_FORM_udata:
-      skip_leb128 (ptr);
-      break;
-    case DW_FORM_string:
-      ptr = (unsigned char *) strchr ((char *)ptr, '\0') + 1;
-      break;
-    case DW_FORM_indirect:
-      abort ();
-    case DW_FORM_block1:
-      len = *ptr++;
-      break;
-    case DW_FORM_block2:
-      len = read_16 (ptr);
-      form = DW_FORM_block1;
-      break;
-    case DW_FORM_block4:
-      len = read_32 (ptr);
-      form = DW_FORM_block1;
-      break;
-    case DW_FORM_block:
-    case DW_FORM_exprloc:
-      len = read_uleb128 (ptr);
-      form = DW_FORM_block1;
-      break;
-    default:
-      abort ();
-    }
-
-  if (form == DW_FORM_block1)
-    ptr += len;
-
-  return ptr;
-}
-
 /* For a die attribute ATTR starting at PTR, with the die in CU, return the
    pointer after the attribute.  */
 static inline unsigned char * FORCE_INLINE
-- 
2.18.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-10-13 21:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-12 19:06 [PATCH 1/2] Read DWARF5 .debug_line Mark Wielaard
2020-10-12 19:06 ` [PATCH 2/2] Write DWARF5 multifile .debug_line if possible Mark Wielaard
2020-10-13 14:23   ` Jakub Jelinek
2020-10-13 14:19 ` [PATCH 1/2] Read DWARF5 .debug_line Jakub Jelinek
2020-10-13 21:14   ` Mark Wielaard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).