From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 23869 invoked by alias); 18 Feb 2020 17:37:11 -0000 Mailing-List: contact dwz-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Post: List-Help: List-Subscribe: Sender: dwz-owner@sourceware.org Received: (qmail 23860 invoked by uid 89); 18 Feb 2020 17:37:11 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Checked: by ClamAV 0.100.3 on sourceware.org X-Virus-Found: No X-Spam-SWARE-Status: No, score=-25.1 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 spammy=phases X-Spam-Status: No, score=-25.1 required=5.0 tests=AWL,BAYES_00,GIT_PATCH_0,GIT_PATCH_1,GIT_PATCH_2,GIT_PATCH_3,SPF_PASS autolearn=ham version=3.3.1 X-Spam-Checker-Version: SpamAssassin 3.3.1 (2010-03-16) on sourceware.org X-Spam-Level: X-HELO: mx2.suse.de X-Virus-Scanned: by amavisd-new at test-mx.suse.de Date: Wed, 01 Jan 2020 00:00:00 -0000 From: Tom de Vries To: dwz@sourceware.org, jakub@redhat.com Subject: [committed] Add --devel-uni-lang Message-ID: <20200218173703.GA23996@delia> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.10.1 (2018-07-13) X-SW-Source: 2020-q1/txt/msg00078.txt Hi, Add a developer-only option --devel-uni-lang, that forces duplicate chains to consist of DIEs from CUs with the same language. Note when using the option, each PU root DIE has the language attribute set. This is done to make multifile work with this option (though the attribute is also added in regular mode). Alternatively, we could store that information in a side table, to be kept inbetween multifile phases. We will show here the effect of the option on the cc1 benchmark: ... $ dwz -lnone cc1 -o 1 --devel-no-uni-lang $ dwz -lnone cc1 -o 2 --devel-uni-lang ... The benchmark consists of CUs with the following languages: ... $ readelf -wi cc1 \ | grep "DW_AT_language.*:" \ | awk '{print $5, $6}' \ | sort \ | uniq -c 2 (ANSI C) 68 (ANSI C99) 622 (C++) 3 (MIPS assembler) ... Doing the same analysis for 1 gives the same results, but for 2 we see the additional language attributes in the PUs: ... 2 (ANSI C) 155 (ANSI C99) 2753 (C++) 3 (MIPS assembler) ... The size effect for this example seems to be in the noise range: ... $ diff.sh cc1 1 .debug_info red: 44.84% 111527248 61527733 .debug_abbrev red: 40.28% 1722726 1028968 .debug_str red: 0% 6609355 6609355 total red: 42.30% 119859329 69166056 $ diff.sh cc1 2 .debug_info red: 44.84% 111527248 61521767 .debug_abbrev red: 39.99% 1722726 1033916 .debug_str red: 0% 6609355 6609355 total red: 42.30% 119859329 69165038 ... Tested on-by-default with gdb testsuite using board cc-with-dwz and cc-with-dwz-m. Committed to trunk. Thanks, - Tom Add --devel-uni-lang 2020-02-18 Tom de Vries * dwz.c (uni_lang_p): New var. (checksum_die, die_eq_1): Handle uni_lang_p. (read_debug_info): Ensure that the language attribute is read for uni_lang_p, and for PUs. (nr_bytes_for): New function. (partition_dups_1): Handle uni_lang_p in heuristics. (build_abbrevs_for_die): Add language attribute. (write_unit_die): Handle language attribute. (dwz_options, usage): Add --devel-uni-lang/--devel-no-uni-lang entries. --- dwz.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/dwz.c b/dwz.c index 4bcd3c7..344e1ee 100644 --- a/dwz.c +++ b/dwz.c @@ -219,6 +219,7 @@ enum deduplication_mode dm_inter_cu }; static enum deduplication_mode deduplication_mode = dm_inter_cu; +static int uni_lang_p = 0; enum die_count_methods { none, @@ -2688,6 +2689,8 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die) skip_leb128 (ptr); s = die->die_tag; die->u.p1.die_hash = iterative_hash_object (s, die->u.p1.die_hash); + if (uni_lang_p && die == top_die) + die->u.p1.die_hash = iterative_hash_object (cu->lang, die->u.p1.die_hash); only_hash_name_p = odr && die_odr_state (die_cu (die), die) != ODR_NONE; die_hash2 = 0; if (only_hash_name_p) @@ -3816,6 +3819,10 @@ die_eq_1 (dw_cu_ref cu1, dw_cu_ref cu2, return 0; assert (!die1->die_root && !die2->die_root); + if (uni_lang_p && die1 == top_die1 && die2 == top_die2 + && cu1->lang != cu2->lang) + return 0; + only_compare_name_p = odr && die1->die_odr_state != ODR_NONE && die2->die_odr_state != ODR_NONE; @@ -5698,6 +5705,7 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count) bool low_mem_phase1 = low_mem && kind == DEBUG_INFO; struct dw_cu cu_buf; struct dw_die die_buf; + bool lang_p = odr || uni_lang_p; odr_active_p = false; if (odr) @@ -6070,7 +6078,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count) case DW_FORM_flag_present: break; case DW_FORM_data1: - if (odr && die->die_tag == DW_TAG_compile_unit + if (lang_p + && (die->die_tag == DW_TAG_compile_unit + || die->die_tag == DW_TAG_partial_unit) && t->attr[i].attr == DW_AT_language) cu->lang = *ptr; /* FALLTHRU */ @@ -6079,7 +6089,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count) ++ptr; break; case DW_FORM_data2: - if (odr && die->die_tag == DW_TAG_compile_unit + if (lang_p + && (die->die_tag == DW_TAG_compile_unit + || die->die_tag == DW_TAG_partial_unit) && t->attr[i].attr == DW_AT_language) cu->lang = do_read_16 (ptr); /* FALLTHRU */ @@ -6087,7 +6099,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count) ptr += 2; break; case DW_FORM_data4: - if (odr && die->die_tag == DW_TAG_compile_unit + if (lang_p + && (die->die_tag == DW_TAG_compile_unit + || die->die_tag == DW_TAG_partial_unit) && t->attr[i].attr == DW_AT_language) read_lang (ptr, form, &cu->lang); /* FALLTHRU */ @@ -6096,7 +6110,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count) ptr += 4; break; case DW_FORM_data8: - if (odr && die->die_tag == DW_TAG_compile_unit + if (lang_p + && (die->die_tag == DW_TAG_compile_unit + || die->die_tag == DW_TAG_partial_unit) && t->attr[i].attr == DW_AT_language) read_lang (ptr, form, &cu->lang); /* FALLTHRU */ @@ -6106,7 +6122,9 @@ read_debug_info (DSO *dso, int kind, unsigned int *die_count) break; case DW_FORM_sdata: case DW_FORM_udata: - if (odr && die->die_tag == DW_TAG_compile_unit + if (lang_p + && (die->die_tag == DW_TAG_compile_unit + || die->die_tag == DW_TAG_partial_unit) && t->attr[i].attr == DW_AT_language) { ptr = read_lang (ptr, form, &cu->lang); @@ -6870,6 +6888,21 @@ copy_die_tree (dw_die_ref parent, dw_die_ref die) return new_die; } +/* Return how many bytes we need to encode VAL. */ +static unsigned int +nr_bytes_for (uint64_t val) +{ + unsigned int n; + + if (val == 0) + return 1; + + for (n = 0; val > 0; n++) + val = val >> 8; + + return n; +} + /* Helper function of partition_dups_1. Decide what DIEs matching in multiple CUs might be worthwhile to be moved into partial units, construct those partial units. */ @@ -7024,6 +7057,9 @@ partition_dups_1 (dw_die_ref *arr, size_t vec_size, DW_FORM_string: 1 or more bytes. Assume 4 bytes. */ + 4 + /* CU Root DIE: DW_AT_language (constant). + 1 or 2 bytes. */ + + (uni_lang_p ? nr_bytes_for (die_cu (arr[i])->lang) : 0) /* CU root DIE children terminator: abbreviation code 0 (unsigned LEB128). 1 byte. */ @@ -7068,6 +7104,8 @@ partition_dups_1 (dw_die_ref *arr, size_t vec_size, partial_cu->cu_offset = *last_partial_cu == NULL ? 0 : (*last_partial_cu)->cu_offset + 1; partial_cu->cu_version = refcu->cu_version; + if (uni_lang_p) + partial_cu->lang = refcu->lang; if (*first_partial_cu == NULL) *first_partial_cu = *last_partial_cu = partial_cu; else @@ -9847,6 +9885,24 @@ build_abbrevs_for_die (htab_t h, dw_cu_ref cu, dw_die_ref die, die->die_size += 4; t->nattr++; } + if (uni_lang_p) + { + unsigned int lang_size = nr_bytes_for (cu->lang); + die->die_size += lang_size; + t->attr[t->nattr].attr = DW_AT_language; + switch (lang_size) + { + case 1: + t->attr[t->nattr].form = DW_FORM_data1; + break; + case 2: + t->attr[t->nattr].form = DW_FORM_data2; + break; + default: + abort (); + } + t->nattr++; + } if (refcu->cu_comp_dir) { enum dwarf_form form; @@ -10977,6 +11033,14 @@ write_unit_die (unsigned char *ptr, dw_die_ref die, dw_die_ref origin) } } break; + case DW_AT_language: + { + enum dwarf_source_language lang = die_cu (die)->lang; + unsigned int lang_size = nr_bytes_for (lang); + write_size (ptr, lang_size, lang); + ptr += lang_size; + } + break; default: assert (false); break; @@ -14648,6 +14712,10 @@ static struct option dwz_options[] = { "devel-stats", no_argument, &stats_p, 1 }, { "devel-deduplication-mode", required_argument, &deduplication_mode_parsed, 1 }, + { "devel-uni-lang", + no_argument, &uni_lang_p, 1 }, + { "devel-no-uni-lang", + no_argument, &uni_lang_p, 0 }, #endif { "odr", no_argument, &odr, 1 }, { "no-odr", no_argument, &odr, 0 }, @@ -14897,7 +14965,8 @@ usage (void) " --devel-dump-edges\n" " --devel-partition-dups-opt\n" " --devel-die-count-method\n" - " --devel-deduplication-mode={none,intra-cu,inter-cu}\n"); + " --devel-deduplication-mode={none,intra-cu,inter-cu}\n" + " --devel-uni-lang / --devel-no-uni-lang\n"); fprintf (stderr, "%s", msg); #endif