public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [committed] Cache file name property in struct dw_file
@ 2020-01-01  0:00 Tom de Vries
  0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2020-01-01  0:00 UTC (permalink / raw)
  To: dwz, jakub

Hi,

When profiling with benchmark clang-10, we observe a hotspot in the strlen
call in die_eq_1:
...
       │                    file_len = strlen (cu_file1->file);
       │        or     $0xffffffffffffffff,%rcx
  0,01 │        mov    %r15,%rdi
       │                    if (cu_file1->dir != NULL)
  0,01 │        mov    0x0(%r13),%r13
       │                    file_len = strlen (cu_file1->file);
 26,24 │        repnz  scas %es:(%rdi),%al
  3,16 │        mov    0x58(%rsp),%r10
...
That is, 29.4% of the time spent in die_eq_1 is spent in this strlen call
(while 23.10% of the overall time is spent in die_eq_1).

The file_len variable is used to calculate a more complex property of the
filename:
...
              else if (cu_file1->file[0] == '<'
                       && cu_file1->file[file_len - 1] == '>'
                       && strchr (cu_file1->file, '/') == NULL
...
which is also calculated in checksum_die.

Cache the property in a new struct dw_file field named
file_angle_brackets_encapsulated_no_slash, and used it in both checksum_die
and die_eq_1.

This speeds up processing of clang-10 with ~6.5%:
...
real:  mean:  263942.00  100.00%  stddev:  1558.78
       mean:  247611.33   93.81%  stddev:   687.17
user:  mean:  253755.33  100.00%  stddev:   447.36
       mean:  236928.33   93.37%  stddev:  1721.72
sys:   mean:    5296.00  100.00%  stddev:   820.97
       mean:    5396.67  101.90%  stddev:  1013.13
...

In particular, with --devel-progress we observe going from:
...
partition_dups split_dups
user: 94.50
sys : 0.16
...
to:
...
partition_dups split_dups
user: 74.44
sys : 0.01
...
which is a reduction of 21.2%.

Committed to trunk.

Thanks,
- Tom

Cache file name property in struct dw_file

2020-01-07  Tom de Vries  <tdevries@suse.de>

	* dwz.c (struct dw_file): Add field
	file_angle_brackets_encapsulated_no_slash.
	(read_debug_line): Initialize new field.
	(checksum_die, eq_1): Use new field.

---
 dwz.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/dwz.c b/dwz.c
index f4ebd22..8fac945 100644
--- a/dwz.c
+++ b/dwz.c
@@ -681,6 +681,7 @@ struct dw_file
   char *dir;
   char *file;
   uint64_t time, size;
+  unsigned int file_angle_brackets_encapsulated_no_slash : 1;
 };
 
 /* Internal representation of a compilation (or partial)
@@ -1230,17 +1231,24 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
       cu->cu_files[file_cnt].dir = (char *) dirt[value];
       cu->cu_files[file_cnt].time = read_uleb128 (ptr);
       cu->cu_files[file_cnt].size = read_uleb128 (ptr);
+      size_t file_len = (char *) end - cu->cu_files[file_cnt].file;
+      size_t strlen_file = file_len - 1;
+      bool file_has_slash = false;
       if (cu->cu_files[file_cnt].file[0] != '/'
 	  && cu->cu_files[file_cnt].dir != NULL)
 	{
-	  size_t file_len = (char *) end - cu->cu_files[file_cnt].file;
 	  size_t dir_len = strlen (cu->cu_files[file_cnt].dir);
 	  if (dir_len)
 	    {
 	      obstack_grow (&ob, cu->cu_files[file_cnt].dir,
 			    dir_len);
+	      strlen_file += dir_len;
 	      if (cu->cu_files[file_cnt].dir[dir_len - 1] != '/')
-		obstack_1grow (&ob, '/');
+		{
+		  obstack_1grow (&ob, '/');
+		  strlen_file++;
+		}
+	      file_has_slash = true;
 	      obstack_grow (&ob, cu->cu_files[file_cnt].file,
 			    file_len);
 	      cu->cu_files[file_cnt].file
@@ -1248,6 +1256,11 @@ read_debug_line (DSO *dso, dw_cu_ref cu, uint32_t off)
 	      cu->cu_files[file_cnt].dir = NULL;
 	    }
 	}
+      cu->cu_files[file_cnt].file_angle_brackets_encapsulated_no_slash
+	= (!file_has_slash
+	   && cu->cu_files[file_cnt].file[0] == '<'
+	   && cu->cu_files[file_cnt].file[strlen_file - 1] == '>'
+	   && strchr (cu->cu_files[file_cnt].file, '/') == NULL);
       file_cnt++;
     }
 
@@ -2620,9 +2633,7 @@ checksum_die (DSO *dso, dw_cu_ref cu, dw_die_ref top_die, dw_die_ref die)
 					die->u.p1.die_hash);
 		  /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
 		     etc. if immediately followed by DW_AT_*_line 0.  */
-		  else if (cu_file->file[0] == '<'
-			   && cu_file->file[file_len - 1] == '>'
-			   && strchr (cu_file->file, '/') == NULL
+		  else if (cu_file->file_angle_brackets_encapsulated_no_slash
 			   && i + 1 < t->nattr
 			   && t->attr[i + 1].attr
 			      == (t->attr[i].attr == DW_AT_decl_file
@@ -3833,14 +3844,12 @@ die_eq_1 (dw_cu_ref cu1, dw_cu_ref cu2,
 		= &cu1->cu_files[value1 - 1];
 	      struct dw_file *cu_file2
 		= &cu2->cu_files[value2 - 1];
-	      unsigned int file_len;
 
 	      if (cu_file1->time != cu_file2->time
 		  || cu_file1->size != cu_file2->size
 		  || strcmp (cu_file1->file, cu_file2->file))
 		FAIL;
 
-	      file_len = strlen (cu_file1->file);
 	      if (cu_file1->dir != NULL)
 		{
 		  if (cu_file2->dir == NULL
@@ -3851,9 +3860,7 @@ die_eq_1 (dw_cu_ref cu1, dw_cu_ref cu2,
 		FAIL;
 	      /* Ignore DW_AT_comp_dir for DW_AT_*_file <built-in>
 		 etc. if immediately followed by DW_AT_*_line 0.  */
-	      else if (cu_file1->file[0] == '<'
-		       && cu_file1->file[file_len - 1] == '>'
-		       && strchr (cu_file1->file, '/') == NULL
+	      else if (cu_file1->file_angle_brackets_encapsulated_no_slash
 		       && i + 1 < t1->nattr
 		       && j + 1 < t2->nattr
 		       && t1->attr[i + 1].attr

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-01-07 14:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-01-01  0:00 [committed] Cache file name property in struct dw_file Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).