public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [PATCH 1/2] Read DWARF5 .debug_line.
@ 2020-10-12 19:06 Mark Wielaard
  2020-10-12 19:06 ` [PATCH 2/2] Write DWARF5 multifile .debug_line if possible Mark Wielaard
  2020-10-13 14:19 ` [PATCH 1/2] Read DWARF5 .debug_line Jakub Jelinek
  0 siblings, 2 replies; 5+ messages in thread
From: Mark Wielaard @ 2020-10-12 19:06 UTC (permalink / raw)
  To: dwz; +Cc: Mark Wielaard

This handles reading DWARF5 .debug_line just like earlier DWARF versions.
Sets file time and size to zero when not present. It skips the zero entry
file entry (which cannot be referred to from any attribute since the value
zero indicates that no source file), but does read the zero dir entry
(which can be referred to from the file index). It does not yet handle
MD5 checksums.

ChangeLog:

	* dwz.c (get_DW_LNCT_str): New function.
	(skip_attr_no_dw_form_indirect): Move before read_debug_line.
	(read_debug_line): Handle version 5.
---
 dwz.c | 611 ++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 487 insertions(+), 124 deletions(-)

diff --git a/dwz.c b/dwz.c
index 9dbf651..9345e6c 100644
--- a/dwz.c
+++ b/dwz.c
@@ -711,6 +711,28 @@ get_DW_UT_str (unsigned int ut)
   return buf;
 }
 
+/* Retrun a DW_LNCT_* name.  */
+static const char *
+get_DW_LNCT_str (unsigned int lnct)
+{
+  const char *name;
+  static char buf[9 + 3 * sizeof (int)];
+  switch (lnct)
+    {
+    case DW_LNCT_path: name = "DW_LNCT_path"; break;
+    case DW_LNCT_directory_index: name = "DW_LNCT_directory_index"; break;
+    case DW_LNCT_timestamp: name = "DW_LNCT_timestamp"; break;
+    case DW_LNCT_size: name = "DW_LNCT_size"; break;
+    case DW_LNCT_MD5: name = "DW_LNCT_MD5"; break;
+
+    default: name = 0; break;
+    }
+  if (name)
+    return name;
+  sprintf (buf, "DW_LNCT_%u", lnct);
+  return buf;
+}
+
 /* This must match the debug_sections array content
    below.  */
 enum debug_section_kind
@@ -1345,6 +1367,86 @@ read_abbrev (DSO *dso, unsigned char *ptr)
   return h;
 }
 
+/* For a die attribute with form FORM starting at PTR, with the die in CU,
+   return the pointer after the attribute, assuming FORM is not
+   dw_form_indirect.  */
+static inline unsigned char * FORCE_INLINE
+skip_attr_no_dw_form_indirect (unsigned int cu_version, uint32_t form,
+			       unsigned char *ptr)
+{
+  size_t len = 0;
+
+  switch (form)
+    {
+    case DW_FORM_ref_addr:
+      ptr += cu_version == 2 ? ptr_size : 4;
+      break;
+    case DW_FORM_addr:
+      ptr += ptr_size;
+      break;
+    case DW_FORM_flag_present:
+    case DW_FORM_implicit_const:
+      break;
+    case DW_FORM_ref1:
+    case DW_FORM_flag:
+    case DW_FORM_data1:
+      ++ptr;
+      break;
+    case DW_FORM_ref2:
+    case DW_FORM_data2:
+      ptr += 2;
+      break;
+    case DW_FORM_ref4:
+    case DW_FORM_data4:
+    case DW_FORM_sec_offset:
+    case DW_FORM_strp:
+    case DW_FORM_line_strp:
+      ptr += 4;
+      break;
+    case DW_FORM_ref8:
+    case DW_FORM_data8:
+    case DW_FORM_ref_sig8:
+      ptr += 8;
+      break;
+    case DW_FORM_data16:
+      ptr += 16;
+      break;
+    case DW_FORM_sdata:
+    case DW_FORM_ref_udata:
+    case DW_FORM_udata:
+      skip_leb128 (ptr);
+      break;
+    case DW_FORM_string:
+      ptr = (unsigned char *) strchr ((char *)ptr, '\0') + 1;
+      break;
+    case DW_FORM_indirect:
+      abort ();
+    case DW_FORM_block1:
+      len = *ptr++;
+      break;
+    case DW_FORM_block2:
+      len = read_16 (ptr);
+      form = DW_FORM_block1;
+      break;
+    case DW_FORM_block4:
+      len = read_32 (ptr);
+      form = DW_FORM_block1;
+      break;
+    case DW_FORM_block:
+    case DW_FORM_exprloc:
+      len = read_uleb128 (ptr);
+      form = DW_FORM_block1;
+      break;
+    default:
+      abort ();
+    }
+
+  if (form == DW_FORM_block1)
+    ptr += len;
+
+  return ptr;
+}
+
 /* Read the directory and file table from .debug_line offset OFF,
    record it in CU.  */
 static int
@@ -1356,7 +1458,16 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
   unsigned char *endcu, *endprol;
   unsigned char opcode_base;
   unsigned int culen;
-  uint32_t value, dirt_cnt, file_cnt;
+  uint32_t value, version, ndirs, nfiles, dirt_cnt, file_cnt;
+  /* DWARF5 has a dynamic table of elements in possible different
+     forms.  But we are only interested in the known elements (path,
+     dir index, time, size and possibly later md5).  */
+  unsigned char n, nelems = 0;
+  int path_ndx = -1;
+  int dir_ndx = -1;
+  int time_ndx = -1;
+  int size_ndx = -1;
+  uint16_t elems[256];
 
   if (off >= debug_sections[DEBUG_LINE].size - 4)
     {
@@ -1384,12 +1495,37 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
     }
 
   value = read_16 (ptr);
-  if (value < 2 || value > 4)
+  if (value < 2 || value > 5)
     {
       error (0, 0, "%s: DWARF version %d in .debug_line unhandled",
 	     dso->filename, value);
       return 1;
     }
+  version = value;
+
+  if (version >= 5)
+    {
+      int addr_size, seg_size;
+      if (ptr + 2 > endcu)
+	{
+	  error (0, 0, "%s: .debug_line header too short", dso->filename);
+	  return 1;
+	}
+      addr_size = *ptr++;
+      seg_size = *ptr++;
+      if (addr_size != ptr_size)
+	{
+	  error (0, 0, "%s: .debug_line address size differs from CU ptr size",
+		 dso->filename);
+	  return 1;
+	}
+      if (seg_size != 0)
+	{
+	  error (0, 0, "%s: .debug_line non-zero segment selector size",
+		 dso->filename);
+	  return 1;
+	}
+    }
 
   endprol = ptr + 4;
   endprol += read_32 (ptr);
@@ -1400,73 +1536,381 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
       return 1;
     }
 
-  opcode_base = ptr[4 + (value >= 4)];
-  ptr = dir = ptr + 4 + (value >= 4) + opcode_base;
+  opcode_base = ptr[4 + (version >= 4)];
+  ptr = dir = ptr + 4 + (version >= 4) + opcode_base;
 
   /* dir table: */
-  value = 1;
-  while (*ptr != 0)
+  if (version < 5)
+    {
+      value = 1;
+      while (*ptr != 0)
+	{
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	  ++value;
+	}
+      ndirs = value;
+    }
+  else
     {
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
-      ++value;
+      nelems = *ptr++;
+      for (n = 0; n < nelems; n++)
+	{
+	  uint16_t lnct = read_uleb128 (ptr);
+	  uint16_t form = read_uleb128 (ptr);
+	  if (lnct == DW_LNCT_path)
+	    {
+	      if (path_ndx != -1)
+		{
+		  error (0, 0, "%s: .debug_line duplicate dir path elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      path_ndx = n;
+	    }
+	  else
+	    {
+	      error (0, 0, "%s: .debug_line unhandled dir element %s\n",
+		     dso->filename, get_DW_LNCT_str (lnct));
+	      return 1;
+	    }
+
+	  if (form != DW_FORM_string
+	      && form != DW_FORM_strp
+	      && form != DW_FORM_line_strp)
+	    {
+	      error (0, 0, "%s: .debug_line unhandled form %s for dir path\n",
+		     dso->filename, get_DW_FORM_str (form));
+	      return 1;
+	    }
+
+	  elems[n] = form;
+	}
+
+      ndirs = read_uleb128 (ptr);
     }
 
-  dirt = (unsigned char **) alloca (value * sizeof (unsigned char *));
-  dirt[0] = NULL;
-  dirt_cnt = 1;
-  ptr = dir;
-  while (*ptr != 0)
+  dirt = (unsigned char **) alloca (ndirs * sizeof (unsigned char *));
+  if (version < 5)
+    {
+      dirt[0] = NULL;
+      dirt_cnt = 1;
+      ptr = dir;
+      while (*ptr != 0)
+	{
+	  dirt[dirt_cnt++] = ptr;
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	}
+      ptr++;
+    }
+  else
     {
-      dirt[dirt_cnt++] = ptr;
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+      for (dirt_cnt = 0; dirt_cnt < ndirs; dirt_cnt++)
+	{
+	  for (n = 0; n < nelems; n++)
+	    {
+	      uint32_t form = elems[n];
+	      if (n == path_ndx)
+		{
+		  unsigned char *d;
+		  switch (form)
+		    {
+		    case DW_FORM_string:
+		      d = (unsigned char *) ptr;
+		      break;
+		    case DW_FORM_strp:
+		      {
+			unsigned int strp = do_read_32 (ptr);
+			if (strp >= debug_sections[DEBUG_STR].size)
+			  d = NULL;
+			else
+			  d = ((unsigned char *)
+			       debug_sections[DEBUG_STR].data
+			       + strp);
+		      }
+		      break;
+		    case DW_FORM_line_strp:
+		      {
+			unsigned int line_strp = do_read_32 (ptr);
+			if (line_strp >= debug_sections[DEBUG_LINE_STR].size)
+			  d = NULL;
+			else
+			  d = ((unsigned char *)
+			       debug_sections[DEBUG_LINE_STR].data
+			       + line_strp);
+		      }
+		      break;
+		    default:
+		      d = NULL;
+		      break;
+		    }
+
+		  if (d == NULL)
+		    {
+		      error (0, 0, "%s: .debug_line bad dir path\n",
+			     dso->filename);
+		      return 1;
+		    }
+
+		  /* Note we do this even for the zero entry, which is
+		     marked as NULL for pre-DWARF5 line tables.  This
+		     is important for when we merge file entries
+		     together for a multifile because the zero dir
+		     entry could differ.  It is should be equivalent
+		     to the CU DIE comp_dir attribute, but we don't
+		     track that all CUs referring to the (same) line
+		     table share identical an DW_AT_comp_dir value.  */
+		  dirt[dirt_cnt] = d;
+		}
+	      ptr = skip_attr_no_dw_form_indirect (cu->cu_version, form, ptr);
+	    }
+	}
     }
-  ptr++;
 
   /* file table: */
   file = ptr;
-  file_cnt = 0;
-  while (*ptr != 0)
+  if (version < 5)
     {
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
-      value = read_uleb128 (ptr);
+      file_cnt = 0;
+      while (*ptr != 0)
+	{
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	  value = read_uleb128 (ptr);
+
+	  if (value >= dirt_cnt)
+	    {
+	      error (0, 0, "%s: Wrong directory table index %u",
+		     dso->filename, value);
+	      return 1;
+	    }
 
-      if (value >= dirt_cnt)
+	  skip_leb128 (ptr);
+	  skip_leb128 (ptr);
+	  file_cnt++;
+	}
+      nfiles = file_cnt;
+    }
+  else
+    {
+      nelems = *ptr++;
+      path_ndx = -1;
+      for (n = 0; n < nelems; n++)
 	{
-	  error (0, 0, "%s: Wrong directory table index %u",
-		 dso->filename, value);
-	  return 1;
+	  uint16_t lnct = read_uleb128 (ptr);
+	  uint16_t form = read_uleb128 (ptr);
+	  switch (lnct)
+	    {
+	    case DW_LNCT_path:
+	      if (path_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file path elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      path_ndx = n;
+
+	      /* Currently we only handle two string form which always
+		 stay... */
+	      if (form != DW_FORM_string && form != DW_FORM_line_strp)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for file path\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    case DW_LNCT_directory_index:
+	      if (dir_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file dir elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      dir_ndx = n;
+
+	      if (form != DW_FORM_data1
+		  && form != DW_FORM_data2
+		  && form != DW_FORM_udata)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for dir index\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    case DW_LNCT_timestamp:
+	      if (time_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file time elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      time_ndx = n;
+
+	      if (form != DW_FORM_udata
+		  && form != DW_FORM_data4
+		  && form != DW_FORM_data8)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for file time\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    case DW_LNCT_size:
+	      if (size_ndx != -1)
+		{
+		  error (0, 0,
+			 "%s: .debug_line duplicate file size elements\n",
+			 dso->filename);
+		  return 1;
+		}
+	      size_ndx = n;
+
+	      if (form != DW_FORM_udata
+		  && form != DW_FORM_data1
+		  && form != DW_FORM_data2
+		  && form != DW_FORM_data4
+		  && form != DW_FORM_data8)
+		{
+		  error (0, 0,
+			 "%s: .debug_line unhandled form %s for file size\n",
+			 dso->filename, get_DW_FORM_str (form));
+		  return 1;
+		}
+	      break;
+
+	    default:
+	      error (0, 0, "%s: .debug_line unhandled file element %s\n",
+		     dso->filename, get_DW_LNCT_str (lnct));
+	      return 1;
+	    }
+	  elems[n] = form;
 	}
 
-      skip_leb128 (ptr);
-      skip_leb128 (ptr);
-      file_cnt++;
+      nfiles = read_uleb128 (ptr);
+      nfiles--; /* We will skip the first (zero) entry.  */
     }
 
-  cu->cu_nfiles = file_cnt;
-  cu->cu_files = pool_alloc (dw_file, file_cnt * sizeof (struct dw_file));
-  memset (cu->cu_files, 0, file_cnt * sizeof (struct dw_file));
+  cu->cu_nfiles = nfiles;
+  cu->cu_files = pool_alloc (dw_file, nfiles * sizeof (struct dw_file));
+  memset (cu->cu_files, 0, nfiles * sizeof (struct dw_file));
+
+  if (version < 5)
+    ptr = file;
 
-  ptr = file;
-  file_cnt = 0;
-  while (*ptr != 0)
+  for (file_cnt = 0; file_cnt < nfiles; file_cnt++)
     {
-      unsigned char *end;
-      cu->cu_files[file_cnt].file = (char *) ptr;
-      ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
-      end = ptr;
-      value = read_uleb128 (ptr);
+      char *f = NULL;
+      char *end = NULL;
+      uint32_t d = 0;
+      uint64_t time = 0;
+      uint64_t size = 0;
+      if (version < 5)
+	{
+	  f = (char *) ptr;
+	  ptr = (unsigned char *) strchr ((char *)ptr, 0) + 1;
+	  end = (char *) ptr;
+	  d = read_uleb128 (ptr);
+	  time = read_uleb128 (ptr);
+	  size = read_uleb128 (ptr);
+	}
+      else
+	{
+	  /* Skip zero entry.  */
+	  if (file_cnt == 0)
+	    for (n = 0; n < nelems; n++)
+	      ptr = skip_attr_no_dw_form_indirect (cu->cu_version,
+						   elems[n], ptr);
+
+	  for (n = 0; n < nelems; n++)
+	    {
+	      uint32_t form = elems[n];
+	      if (n == path_ndx)
+		{
+		  switch (form)
+		    {
+		    case DW_FORM_string:
+		      f = (char *) ptr;
+		      end = strchr ((char *)ptr, 0) + 1;;
+		      break;
+		    case DW_FORM_strp:
+		      {
+			unsigned int strp = do_read_32 (ptr);
+			if (strp >= debug_sections[DEBUG_STR].size)
+			  f = NULL;
+			else
+			  {
+			    f = ((char *) debug_sections[DEBUG_STR].data
+				 + strp);
+			    end = f + strlen (f) + 1;
+			  }
+		      }
+		      break;
+		    case DW_FORM_line_strp:
+		      {
+			unsigned int line_strp = do_read_32 (ptr);
+			if (line_strp >= debug_sections[DEBUG_LINE_STR].size)
+			  f = NULL;
+			else
+			  {
+			    f = ((char *) debug_sections[DEBUG_LINE_STR].data
+				 + line_strp);
+			    end = f + strlen (f) + 1;
+			  }
+		      }
+		      break;
+		    default:
+		      f = NULL;
+		      break;
+		    }
+
+		  if (f == NULL)
+		    {
+		      error (0, 0, "%s: .debug_line bad file path\n",
+			     dso->filename);
+		      return 1;
+		    }
+		}
+	      else if (n == dir_ndx)
+		{
+		  switch (form)
+		    {
+		    case DW_FORM_data1:
+		      d = *ptr;
+		      break;
+		    case DW_FORM_data2:
+		      d = do_read_16 (ptr);
+		      break;
+		    case DW_FORM_udata:
+		      {
+			unsigned char *p = ptr;
+			d = read_uleb128 (p);
+		      }
+		      break;
+		    }
+		}
+	      ptr = skip_attr_no_dw_form_indirect (cu->cu_version, form, ptr);
+	    }
+	}
 
-      if (value >= dirt_cnt)
+      cu->cu_files[file_cnt].file = f;
+      if (d >= dirt_cnt)
 	{
 	  error (0, 0, "%s: Wrong directory table index %u",
 		 dso->filename, value);
 	  return 1;
 	}
 
-      cu->cu_files[file_cnt].dir = (char *) dirt[value];
-      cu->cu_files[file_cnt].time = read_uleb128 (ptr);
-      cu->cu_files[file_cnt].size = read_uleb128 (ptr);
-      size_t file_len = (char *) end - cu->cu_files[file_cnt].file;
+      cu->cu_files[file_cnt].dir = (char *) dirt[d];
+      cu->cu_files[file_cnt].time = time;
+      cu->cu_files[file_cnt].size = size;
+      size_t file_len = (char *) end - f;
       size_t strlen_file = file_len - 1;
       bool file_has_slash = false;
       if (cu->cu_files[file_cnt].file[0] != '/'
@@ -1496,7 +1940,6 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
 	   && cu->cu_files[file_cnt].file[0] == '<'
 	   && cu->cu_files[file_cnt].file[strlen_file - 1] == '>'
 	   && strchr (cu->cu_files[file_cnt].file, '/') == NULL);
-      file_cnt++;
     }
 
   return 0;
@@ -1702,86 +2145,6 @@ off_htab_lookup (dw_cu_ref cu, unsigned int die_offset)
   return (dw_die_ref) htab_find_with_hash (off_htab, &die, off_hash (&die));
 }
 
-/* For a die attribute with form FORM starting at PTR, with the die in CU,
-   return the pointer after the attribute, assuming FORM is not
-   dw_form_indirect.  */
-static inline unsigned char * FORCE_INLINE
-skip_attr_no_dw_form_indirect (unsigned int cu_version, uint32_t form,
-			       unsigned char *ptr)
-{
-  size_t len = 0;
-
-  switch (form)
-    {
-    case DW_FORM_ref_addr:
-      ptr += cu_version == 2 ? ptr_size : 4;
-      break;
-    case DW_FORM_addr:
-      ptr += ptr_size;
-      break;
-    case DW_FORM_flag_present:
-    case DW_FORM_implicit_const:
-      break;
-    case DW_FORM_ref1:
-    case DW_FORM_flag:
-    case DW_FORM_data1:
-      ++ptr;
-      break;
-    case DW_FORM_ref2:
-    case DW_FORM_data2:
-      ptr += 2;
-      break;
-    case DW_FORM_ref4:
-    case DW_FORM_data4:
-    case DW_FORM_sec_offset:
-    case DW_FORM_strp:
-    case DW_FORM_line_strp:
-      ptr += 4;
-      break;
-    case DW_FORM_ref8:
-    case DW_FORM_data8:
-    case DW_FORM_ref_sig8:
-      ptr += 8;
-      break;
-    case DW_FORM_data16:
-      ptr += 16;
-      break;
-    case DW_FORM_sdata:
-    case DW_FORM_ref_udata:
-    case DW_FORM_udata:
-      skip_leb128 (ptr);
-      break;
-    case DW_FORM_string:
-      ptr = (unsigned char *) strchr ((char *)ptr, '\0') + 1;
-      break;
-    case DW_FORM_indirect:
-      abort ();
-    case DW_FORM_block1:
-      len = *ptr++;
-      break;
-    case DW_FORM_block2:
-      len = read_16 (ptr);
-      form = DW_FORM_block1;
-      break;
-    case DW_FORM_block4:
-      len = read_32 (ptr);
-      form = DW_FORM_block1;
-      break;
-    case DW_FORM_block:
-    case DW_FORM_exprloc:
-      len = read_uleb128 (ptr);
-      form = DW_FORM_block1;
-      break;
-    default:
-      abort ();
-    }
-
-  if (form == DW_FORM_block1)
-    ptr += len;
-
-  return ptr;
-}
-
 /* For a die attribute ATTR starting at PTR, with the die in CU, return the
    pointer after the attribute.  */
 static inline unsigned char * FORCE_INLINE
-- 
2.18.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 2/2] Write DWARF5 multifile .debug_line if possible.
  2020-10-12 19:06 [PATCH 1/2] Read DWARF5 .debug_line Mark Wielaard
@ 2020-10-12 19:06 ` Mark Wielaard
  2020-10-13 14:23   ` Jakub Jelinek
  2020-10-13 14:19 ` [PATCH 1/2] Read DWARF5 .debug_line Jakub Jelinek
  1 sibling, 1 reply; 5+ messages in thread
From: Mark Wielaard @ 2020-10-12 19:06 UTC (permalink / raw)
  To: dwz; +Cc: Mark Wielaard

This write version 5 .debug_line into the multifile if all .debug_line
segments seen before are also version 5. This is more efficient than
version 2, if we don't have to write out the time and size of the files.
But only if there are more than ~25 files because the header is bigger
and there is some overhead for having to handle zero entry dir and file
entries (those do have to exist, but are not directly referenced by
anything).

ChangeLog:

	* dwz.c (lowest_line_version): New static unsigned int, set
	to 5 initially.
	(read_debug_line): Check version and set lowest_line_version.
	(struct line_stats): New.
	(list_line_entries): Update data as struct line_stats.
	(write_multifile_line): Collect time and size stats and write
	out version 5 .debug_line data if lowest_line_version is 5.
---
 dwz.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 129 insertions(+), 16 deletions(-)

diff --git a/dwz.c b/dwz.c
index 9345e6c..e517a47 100644
--- a/dwz.c
+++ b/dwz.c
@@ -484,6 +484,12 @@ typedef struct
    with a single pointer size are handled.  */
 static int ptr_size;
 
+/* Lowest debug_line version we have seen.  When writing out the multi
+   file .debug_line we'll only use a DWARF5 version when there is no
+   lower line table seen (since the debug_line dir and file table is
+   shared between all CUs).  */
+static unsigned int lowest_line_version = 5;
+
 /* Utility functions and macros for reading/writing values in
    given ELF endianity, which might be different from host endianity.
    No specific alignment is expected.  */
@@ -1503,6 +1509,9 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
     }
   version = value;
 
+  if (version < lowest_line_version)
+    lowest_line_version = version;
+
   if (version >= 5)
     {
       int addr_size, seg_size;
@@ -14371,18 +14380,34 @@ write_multifile_strp (void)
   return ret;
 }
 
-/* Helper to record all strp_entry entries from strp_htab.
+/* Hold some statistics on the line entries so we know whether to emit
+   time and/or sizes.  Used by list_line_entries used by
+   write_multifile_line.  */
+struct line_stats
+{
+  struct line_entry ***end;
+  bool has_time;
+  bool has_size;
+};
+
+/* Helper to find the end of the line_htab entries and other stats.
    Called through htab_traverse.  */
 static int
 list_line_entries (void **slot, void *data)
 {
-  struct line_entry ***end = (struct line_entry ***) data;
-  **end = (struct line_entry *) *slot;
+  struct line_stats *stats = (struct line_stats *) data;
+  struct line_entry *entry = (struct line_entry *) *slot;
+  struct line_entry ***end = stats->end;
+  **end = entry;
   (*end)++;
+  if (entry->file->time != 0)
+    stats->has_time = true;
+  if (entry->file->size != 0)
+    stats->has_size = true;
   return 1;
 }
 
-/* Helper function for write_multifile_strp to sort strp_entry
+/* Helper function for write_multifile_strp to sort line_entry
    by increasing new_id.  */
 static int
 line_id_cmp (const void *p, const void *q)
@@ -14405,13 +14430,15 @@ static int
 write_multifile_line (void)
 {
   unsigned int filecnt = 0, dircnt = 0, filetbllen = 0, dirtbllen = 0;
-  unsigned int len, i, j;
+  unsigned int header_len, len, i, j;
   unsigned char *line, *ptr;
   struct line_entry **filearr = NULL;
+  struct line_stats line_stats;
   unsigned int *diridx = NULL, *dirarr = NULL;
   unsigned char buf[17];
   int ret = 0;
 
+  line_stats.has_time = line_stats.has_size = false;
   if (line_htab)
     {
       struct line_entry **end;
@@ -14419,7 +14446,8 @@ write_multifile_line (void)
       filearr = (struct line_entry **)
 		obstack_alloc (&ob, filecnt * sizeof (*filearr));
       end = filearr;
-      htab_traverse (line_htab, list_line_entries, (void *) &end);
+      line_stats.end = &end;
+      htab_traverse (line_htab, list_line_entries, (void *) &line_stats);
       assert (filearr + filecnt == end);
       diridx = (unsigned int *)
 	       obstack_alloc (&ob, filecnt * sizeof (*diridx));
@@ -14476,13 +14504,45 @@ write_multifile_line (void)
 	    }
 	  filetbllen += strlen (file) + 1;
 	  filetbllen += size_of_uleb128 (diridx[i]);
-	  filetbllen += size_of_uleb128 (filearr[i]->file->time);
-	  filetbllen += size_of_uleb128 (filearr[i]->file->size);
+	  if (lowest_line_version < 5 || line_stats.has_time)
+	    filetbllen += size_of_uleb128 (filearr[i]->file->time);
+	  if (lowest_line_version < 5 || line_stats.has_size)
+	    filetbllen += size_of_uleb128 (filearr[i]->file->size);
 	}
       dirarr = (unsigned int *) obstack_finish (&ob);
     }
 
-  len = 17 + filetbllen + dirtbllen;
+  /* standard .debug_line "header" length (both version 2 and 5):
+     unit_length (4) + version (2) + header_length (4) +
+     min_instr_length (1) + default_is_stmt (1) + line_base (1) +
+     line_range (1) + opcode_base (1) = 15
+
+     version 2 adds 2 bytes, one zero byte to terminate dir and file lists.
+
+     version 5 adds at least 11 bytes, max_ops_per_instr (1) +
+     address_size (1) + segment_size (1) + dir_entry_format_cnt (1) +
+     format_pair (2), file_entry_format_cnt (1) + file_format_pairs
+     (4). Plus dircnt (uleb128) + format_pair (2) if has_time +
+     format_pair (2) if has_size) + filecnt (uleb128).
+
+     version 5 also has 2 extra 6 byte "<dwz>" string entries for dir
+     and file entry zero, plus one for the zero file entry dir idx.
+  */
+  header_len = 15;
+  if (lowest_line_version < 5)
+    header_len += 2;
+  else
+    {
+      header_len += 11;
+      header_len += size_of_uleb128 (dircnt + 1);
+      header_len += size_of_uleb128 (filecnt + 1);
+      if (line_stats.has_time)
+	header_len += 2;
+      if (line_stats.has_size)
+	header_len += 2;
+      header_len += 2 * 6 + 1;
+    }
+  len = header_len + filetbllen + dirtbllen;
   if (unlikely (op_multifile))
     {
       debug_sections[DEBUG_LINE].new_size = len;
@@ -14500,20 +14560,41 @@ write_multifile_line (void)
 	  return 1;
 	}
 
-      if (len == 17)
+      if (len == header_len)
 	line = buf;
       else
 	line = (unsigned char *) obstack_alloc (&ob, len);
     }
   ptr = line;
   write_32 (ptr, len - 4);	/* Total length.  */
-  write_16 (ptr, 2);		/* DWARF version.  */
-  write_32 (ptr, len - 10);	/* Header length.  */
+  if (lowest_line_version < 5)
+    write_16 (ptr, 2);		/* DWARF version.  */
+  else
+    {
+      write_16 (ptr, 5);	/* DWARF version.  */
+      write_8 (ptr, multi_ptr_size);	/* Address size.  */
+      write_8 (ptr, 0);	       	/* Segment size.  */
+    }
+  write_32 (ptr,		/* Header length.  */
+	    len - (lowest_line_version < 5 ? 10 : 12));
   write_8 (ptr, 1);		/* Minimum insn length.  */
+  if (lowest_line_version >= 5)
+    write_8 (ptr, 1);		/* Maximum ops per instr.  */
   write_8 (ptr, 1);		/* Default is_stmt.  */
   write_8 (ptr, 0);		/* Line base.  */
   write_8 (ptr, 1);		/* Line range.  */
   write_8 (ptr, 1);		/* Opcode base.  */
+
+  if (lowest_line_version >= 5)
+    {
+      write_8 (ptr, 1);		/* Dir entry format count.  */
+      write_uleb128 (ptr, DW_LNCT_path);
+      write_uleb128 (ptr, DW_FORM_string);
+      write_uleb128 (ptr, dircnt + 1); /* Dir cnt.  */
+      memcpy (ptr, "<dwz>", 6);	/* Zero entry empty dir path.  */
+      ptr += 6;
+    }
+
   for (i = 0; i < dircnt; i++)
     {
       unsigned int l;
@@ -14535,7 +14616,36 @@ write_multifile_line (void)
 	}
       ptr += l;
     }
-  write_8 (ptr, 0);		/* Terminate dir table.  */
+  if (lowest_line_version < 5)
+    write_8 (ptr, 0);		/* Terminate dir table.  */
+  else
+    {
+      unsigned int format_cnt = 2 + line_stats.has_size + line_stats.has_time;
+      write_8 (ptr, format_cnt);	/* File entry format count.  */
+      write_uleb128 (ptr, DW_LNCT_path);
+      write_uleb128 (ptr, DW_FORM_string);
+      write_uleb128 (ptr, DW_LNCT_directory_index);
+      write_uleb128 (ptr, DW_FORM_udata);
+      if (line_stats.has_time)
+	{
+	  write_uleb128 (ptr, DW_LNCT_timestamp);
+	  write_uleb128 (ptr, DW_FORM_udata);
+	}
+      if (line_stats.has_size)
+	{
+	  write_uleb128 (ptr, DW_LNCT_size);
+	  write_uleb128 (ptr, DW_FORM_udata);
+	}
+      write_uleb128 (ptr, filecnt + 1); /* File names cnt.  */
+      memcpy (ptr, "<dwz>", 6);		/* Zero entry empty file path.  */
+      ptr += 6;
+      write_8 (ptr, 0);	       		/* Zero entry zero diridx.  */
+      if (line_stats.has_time)
+	write_8 (ptr, 0);
+      if (line_stats.has_size)
+	write_8 (ptr, 0);
+    }
+
   for (i = 0; i < filecnt; i++)
     {
       const char *file = filearr[i]->file->file;
@@ -14546,10 +14656,13 @@ write_multifile_line (void)
       memcpy (ptr, file, l);
       ptr += l;
       write_uleb128 (ptr, diridx[i]);
-      write_uleb128 (ptr, filearr[i]->file->time);
-      write_uleb128 (ptr, filearr[i]->file->size);
+      if (lowest_line_version < 5 || line_stats.has_time)
+	write_uleb128 (ptr, filearr[i]->file->time);
+      if (lowest_line_version < 5 || line_stats.has_size)
+	write_uleb128 (ptr, filearr[i]->file->size);
     }
-  write_8 (ptr, 0);		/* Terminate file table.  */
+  if (lowest_line_version < 5)
+    write_8 (ptr, 0);		/* Terminate file table.  */
   assert (ptr == line + len);
 
   if (likely (!op_multifile))
-- 
2.18.4


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] Read DWARF5 .debug_line.
  2020-10-12 19:06 [PATCH 1/2] Read DWARF5 .debug_line Mark Wielaard
  2020-10-12 19:06 ` [PATCH 2/2] Write DWARF5 multifile .debug_line if possible Mark Wielaard
@ 2020-10-13 14:19 ` Jakub Jelinek
  2020-10-13 21:14   ` Mark Wielaard
  1 sibling, 1 reply; 5+ messages in thread
From: Jakub Jelinek @ 2020-10-13 14:19 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: dwz

On Mon, Oct 12, 2020 at 09:06:48PM +0200, Mark Wielaard wrote:
> This handles reading DWARF5 .debug_line just like earlier DWARF versions.
> Sets file time and size to zero when not present. It skips the zero entry
> file entry (which cannot be referred to from any attribute since the value
> zero indicates that no source file), but does read the zero dir entry
> (which can be referred to from the file index). It does not yet handle
> MD5 checksums.
> 
> ChangeLog:
> 
> 	* dwz.c (get_DW_LNCT_str): New function.
> 	(skip_attr_no_dw_form_indirect): Move before read_debug_line.
> 	(read_debug_line): Handle version 5.
> ---
>  dwz.c | 611 ++++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 487 insertions(+), 124 deletions(-)
> 
> diff --git a/dwz.c b/dwz.c
> index 9dbf651..9345e6c 100644
> --- a/dwz.c
> +++ b/dwz.c
> @@ -711,6 +711,28 @@ get_DW_UT_str (unsigned int ut)
>    return buf;
>  }
>  
> +/* Retrun a DW_LNCT_* name.  */

s/Retrun/Return/

> +	    case DW_LNCT_path:
> +	      if (path_ndx != -1)
> +		{
> +		  error (0, 0,
> +			 "%s: .debug_line duplicate file path elements\n",

I think no other error calls have \n at the end of the message in the whole
source (several times in the patch).

Otherwise LGTM.

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] Write DWARF5 multifile .debug_line if possible.
  2020-10-12 19:06 ` [PATCH 2/2] Write DWARF5 multifile .debug_line if possible Mark Wielaard
@ 2020-10-13 14:23   ` Jakub Jelinek
  0 siblings, 0 replies; 5+ messages in thread
From: Jakub Jelinek @ 2020-10-13 14:23 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: dwz

On Mon, Oct 12, 2020 at 09:06:49PM +0200, Mark Wielaard wrote:
> This write version 5 .debug_line into the multifile if all .debug_line
> segments seen before are also version 5. This is more efficient than
> version 2, if we don't have to write out the time and size of the files.
> But only if there are more than ~25 files because the header is bigger
> and there is some overhead for having to handle zero entry dir and file
> entries (those do have to exist, but are not directly referenced by
> anything).
> 
> ChangeLog:
> 
> 	* dwz.c (lowest_line_version): New static unsigned int, set
> 	to 5 initially.
> 	(read_debug_line): Check version and set lowest_line_version.
> 	(struct line_stats): New.
> 	(list_line_entries): Update data as struct line_stats.
> 	(write_multifile_line): Collect time and size stats and write
> 	out version 5 .debug_line data if lowest_line_version is 5.

LGTM.

	Jakub


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] Read DWARF5 .debug_line.
  2020-10-13 14:19 ` [PATCH 1/2] Read DWARF5 .debug_line Jakub Jelinek
@ 2020-10-13 21:14   ` Mark Wielaard
  0 siblings, 0 replies; 5+ messages in thread
From: Mark Wielaard @ 2020-10-13 21:14 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: dwz

On Tue, Oct 13, 2020 at 04:19:16PM +0200, Jakub Jelinek wrote:
> On Mon, Oct 12, 2020 at 09:06:48PM +0200, Mark Wielaard wrote:
> > +/* Retrun a DW_LNCT_* name.  */
> 
> s/Retrun/Return/

Fixed.

> > +		  error (0, 0,
> > +			 "%s: .debug_line duplicate file path elements\n",
> 
> I think no other error calls have \n at the end of the message in the whole
> source (several times in the patch).

Oops. Yes, you are right error adds a newline itself.  Also removed
the trailing '\n' from error calls in read_loclist_low_mem_phase1,
read_loclist, write_dso, dwz and optimize_multifile.

> Otherwise LGTM.

Thanks, pushed.

Mark

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-10-13 21:14 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-12 19:06 [PATCH 1/2] Read DWARF5 .debug_line Mark Wielaard
2020-10-12 19:06 ` [PATCH 2/2] Write DWARF5 multifile .debug_line if possible Mark Wielaard
2020-10-13 14:23   ` Jakub Jelinek
2020-10-13 14:19 ` [PATCH 1/2] Read DWARF5 .debug_line Jakub Jelinek
2020-10-13 21:14   ` Mark Wielaard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).