[committed] Add --devel-uni-lang

public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed

From: Tom de Vries <tdevries@suse.de>
To: dwz@sourceware.org, jakub@redhat.com
Subject: [committed] Add --devel-uni-lang
Date: Wed, 01 Jan 2020 00:00:00 -0000	[thread overview]
Message-ID: <20200218173703.GA23996@delia> (raw)

Hi,

Add a developer-only option --devel-uni-lang, that forces duplicate chains to
consist of DIEs from CUs with the same language.

Note when using the option, each PU root DIE has the language attribute set.
This is done to make multifile work with this option (though the attribute is
also added in regular mode).  Alternatively, we could store that information
in a side table, to be kept inbetween multifile phases.

We will show here the effect of the option on the cc1 benchmark:
...
$ dwz -lnone cc1 -o 1 --devel-no-uni-lang
$ dwz -lnone cc1 -o 2 --devel-uni-lang
...

The benchmark consists of CUs with the following languages:
...
$ readelf -wi cc1 \
  | grep "DW_AT_language.*:" \
  | awk '{print $5, $6}' \
  | sort \
  | uniq -c
      2 (ANSI C)
     68 (ANSI C99)
    622 (C++)
      3 (MIPS assembler)
...

Doing the same analysis for 1 gives the same results, but for 2 we see the
additional language attributes in the PUs:
...
      2 (ANSI C)
    155 (ANSI C99)
   2753 (C++)
      3 (MIPS assembler)
...

The size effect for this example seems to be in the noise range:
...
$ diff.sh cc1 1
.debug_info      red: 44.84%    111527248  61527733
.debug_abbrev    red: 40.28%    1722726  1028968
.debug_str       red: 0%        6609355  6609355
total            red: 42.30%    119859329 69166056
$ diff.sh cc1 2
.debug_info      red: 44.84%    111527248  61521767
.debug_abbrev    red: 39.99%    1722726  1033916
.debug_str       red: 0%        6609355  6609355
total            red: 42.30%    119859329 69165038
...

Tested on-by-default with gdb testsuite using board cc-with-dwz and
cc-with-dwz-m.

Committed to trunk.

Thanks,
- Tom

Add --devel-uni-lang

2020-02-18  Tom de Vries  <tdevries@suse.de>

	* dwz.c (uni_lang_p): New var.
	(checksum_die, die_eq_1): Handle uni_lang_p.
	(read_debug_info): Ensure that the language attribute is read for
	uni_lang_p, and for PUs.
	(nr_bytes_for): New function.
	(partition_dups_1): Handle uni_lang_p in heuristics.
	(build_abbrevs_for_die): Add language attribute.
	(write_unit_die): Handle language attribute.
	(dwz_options, usage): Add --devel-uni-lang/--devel-no-uni-lang entries.

---
 dwz.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 75 insertions(+), 6 deletions(-)

diff --git a/dwz.c b/dwz.c
index 4bcd3c7..344e1ee 100644
--- a/dwz.c
+++ b/dwz.c
@@ -219,6 +219,7 @@ enum deduplication_mode
   dm_inter_cu
 };
 static enum deduplication_mode deduplication_mode = dm_inter_cu;
+static int uni_lang_p = 0;
 enum die_count_methods
 {
   none,
@@ -2688,6 +2689,8 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
   skip_leb128 (ptr);
   s = die->die_tag;
   die->u.p1.die_hash = iterative_hash_object (s, die->u.p1.die_hash);
+  if (uni_lang_p && die == top_die)
+    die->u.p1.die_hash = iterative_hash_object (cu->lang, die->u.p1.die_hash);
   only_hash_name_p = odr && die_odr_state (die_cu (die), die) != ODR_NONE;
   die_hash2 = 0;
   if (only_hash_name_p)
@@ -3816,6 +3819,10 @@ die_eq_1 (dw_cu_ref cu1, dw_cu_ref cu2,
     return 0;
   assert (!die1->die_root && !die2->die_root);
 
+  if (uni_lang_p && die1 == top_die1 && die2 == top_die2
+      && cu1->lang != cu2->lang)
+    return 0;
+
   only_compare_name_p
     = odr && die1->die_odr_state != ODR_NONE && die2->die_odr_state != ODR_NONE;
 
@@ -5698,6 +5705,7 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count)
   bool low_mem_phase1 = low_mem && kind == DEBUG_INFO;
   struct dw_cu cu_buf;
   struct dw_die die_buf;
+  bool lang_p = odr || uni_lang_p;
 
   odr_active_p = false;
   if (odr)
@@ -6070,7 +6078,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count)
 		case DW_FORM_flag_present:
 		  break;
 		case DW_FORM_data1:
-		  if (odr && die->die_tag == DW_TAG_compile_unit
+		  if (lang_p
+		      && (die->die_tag == DW_TAG_compile_unit
+			  || die->die_tag == DW_TAG_partial_unit)
 		      && t->attr[i].attr == DW_AT_language)
 		    cu->lang = *ptr;
 		  /* FALLTHRU */
@@ -6079,7 +6089,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count)
 		  ++ptr;
 		  break;
 		case DW_FORM_data2:
-		  if (odr && die->die_tag == DW_TAG_compile_unit
+		  if (lang_p
+		      && (die->die_tag == DW_TAG_compile_unit
+			  || die->die_tag == DW_TAG_partial_unit)
 		      && t->attr[i].attr == DW_AT_language)
 		    cu->lang = do_read_16 (ptr);
 		  /* FALLTHRU */
@@ -6087,7 +6099,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count)
 		  ptr += 2;
 		  break;
 		case DW_FORM_data4:
-		  if (odr && die->die_tag == DW_TAG_compile_unit
+		  if (lang_p
+		      && (die->die_tag == DW_TAG_compile_unit
+			  || die->die_tag == DW_TAG_partial_unit)
 		      && t->attr[i].attr == DW_AT_language)
 		    read_lang (ptr, form, &cu->lang);
 		  /* FALLTHRU */
@@ -6096,7 +6110,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count)
 		  ptr += 4;
 		  break;
 		case DW_FORM_data8:
-		  if (odr && die->die_tag == DW_TAG_compile_unit
+		  if (lang_p
+		      && (die->die_tag == DW_TAG_compile_unit
+			  || die->die_tag == DW_TAG_partial_unit)
 		      && t->attr[i].attr == DW_AT_language)
 		    read_lang (ptr, form, &cu->lang);
 		  /* FALLTHRU */
@@ -6106,7 +6122,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count)
 		  break;
 		case DW_FORM_sdata:
 		case DW_FORM_udata:
-		  if (odr && die->die_tag == DW_TAG_compile_unit
+		  if (lang_p
+		      && (die->die_tag == DW_TAG_compile_unit
+			  || die->die_tag == DW_TAG_partial_unit)
 		      && t->attr[i].attr == DW_AT_language)
 		    {
 		      ptr = read_lang (ptr, form, &cu->lang);
@@ -6870,6 +6888,21 @@ copy_die_tree (dw_die_ref parent, dw_die_ref die)
   return new_die;
 }
 
+/* Return how many bytes we need to encode VAL.  */
+static unsigned int
+nr_bytes_for (uint64_t val)
+{
+  unsigned int n;
+
+  if (val == 0)
+    return 1;
+
+  for (n = 0; val > 0; n++)
+    val = val >> 8;
+
+  return n;
+}
+
 /* Helper function of partition_dups_1.  Decide what DIEs matching in
    multiple CUs might be worthwhile to be moved into partial units,
    construct those partial units.  */
@@ -7024,6 +7057,9 @@ partition_dups_1 (dw_die_ref *arr, size_t vec_size,
 	      DW_FORM_string: 1 or more bytes.
 	      Assume 4 bytes.  */
 	   + 4
+	  /* CU Root DIE: DW_AT_language (constant).
+	     1 or 2 bytes.  */
+	   + (uni_lang_p ? nr_bytes_for (die_cu (arr[i])->lang) : 0)
 	   /* CU root DIE children terminator: abbreviation code 0
 					       (unsigned LEB128).
 	      1 byte.  */
@@ -7068,6 +7104,8 @@ partition_dups_1 (dw_die_ref *arr, size_t vec_size,
 	  partial_cu->cu_offset = *last_partial_cu == NULL
 				  ? 0 : (*last_partial_cu)->cu_offset + 1;
 	  partial_cu->cu_version = refcu->cu_version;
+	  if (uni_lang_p)
+	    partial_cu->lang = refcu->lang;
 	  if (*first_partial_cu == NULL)
 	    *first_partial_cu = *last_partial_cu = partial_cu;
 	  else
@@ -9847,6 +9885,24 @@ build_abbrevs_for_die (htab_t h, dw_cu_ref cu, dw_die_ref die,
 	    die->die_size += 4;
 	    t->nattr++;
 	  }
+	if (uni_lang_p)
+	  {
+	    unsigned int lang_size = nr_bytes_for (cu->lang);
+	    die->die_size += lang_size;
+	    t->attr[t->nattr].attr = DW_AT_language;
+	    switch (lang_size)
+	      {
+	      case 1:
+		t->attr[t->nattr].form = DW_FORM_data1;
+		break;
+	      case 2:
+		t->attr[t->nattr].form = DW_FORM_data2;
+		break;
+	      default:
+		abort ();
+	      }
+	    t->nattr++;
+	  }
 	if (refcu->cu_comp_dir)
 	  {
 	    enum dwarf_form form;
@@ -10977,6 +11033,14 @@ write_unit_die (unsigned char *ptr, dw_die_ref die, dw_die_ref origin)
 	      }
 	  }
 	  break;
+	case DW_AT_language:
+	  {
+	    enum dwarf_source_language lang = die_cu (die)->lang;
+	    unsigned int lang_size = nr_bytes_for (lang);
+	    write_size (ptr, lang_size, lang);
+	    ptr += lang_size;
+	  }
+	  break;
 	default:
 	  assert (false);
 	  break;
@@ -14648,6 +14712,10 @@ static struct option dwz_options[] =
   { "devel-stats",	 no_argument,	    &stats_p, 1 },
   { "devel-deduplication-mode",
 			 required_argument, &deduplication_mode_parsed, 1 },
+  { "devel-uni-lang",
+			 no_argument,	    &uni_lang_p, 1 },
+  { "devel-no-uni-lang",
+			 no_argument,	    &uni_lang_p, 0 },
 #endif
   { "odr",		 no_argument,	    &odr, 1 },
   { "no-odr",		 no_argument,	    &odr, 0 },
@@ -14897,7 +14965,8 @@ usage (void)
        "  --devel-dump-edges\n"
        "  --devel-partition-dups-opt\n"
        "  --devel-die-count-method\n"
-       "  --devel-deduplication-mode={none,intra-cu,inter-cu}\n");
+       "  --devel-deduplication-mode={none,intra-cu,inter-cu}\n"
+       "  --devel-uni-lang / --devel-no-uni-lang\n");
   fprintf (stderr, "%s", msg);
 #endif

                 reply	other threads:[~2020-02-18 17:37 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200218173703.GA23996@delia \
    --to=tdevries@suse.de \
    --cc=dwz@sourceware.org \
    --cc=jakub@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).