public inbox for dwz@sourceware.org
 help / color / mirror / Atom feed
* [RFC] Allow parallel multifile with -p -e
@ 2021-03-26 16:40 Tom de Vries
  2021-03-26 16:47 ` Jakub Jelinek
  0 siblings, 1 reply; 4+ messages in thread
From: Tom de Vries @ 2021-03-26 16:40 UTC (permalink / raw)
  To: dwz, jakub, mark

Hi,

Currently, parallel dwz is disabled when multifile is used:
...
$ dwz -m 5 3 1 2 4 -j 4
...

Enable this when the multifile parameter characteristics are specified using
-p and -e:
...
$ dwz -m 5 3 1 2 4 -j 4 -p 8 -e l
...
This works around the child processes having to communicate back to the parent
the found pointer size and endiannes, and doing the -j auto and -e auto
consistency checking.

So let's compare the output:
...
$ mkdir j1 j4
$ dwz -m 5 3 1 2 4 -j 1 -p 8 -e l
$ cp 1 2 3 4 5 j1
$ dwz -m 5 3 1 2 4 -j 4 -p 8 -e l
$ cp 1 2 3 4 5 j4
...

This gives us reproducible compression:
...
$ ls -la j1/*
-rwxr-xr-x 1 vries users  11432 Mar 26 17:16 j1/1
-rwxr-xr-x 1 vries users  11432 Mar 26 17:16 j1/2
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j1/3
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j1/4
-rw-r--r-- 1 vries users  64543 Mar 26 17:16 j1/5
$ ls -la j4/*
-rwxr-xr-x 1 vries users  11432 Mar 26 17:16 j4/1
-rwxr-xr-x 1 vries users  11432 Mar 26 17:16 j4/2
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j4/3
-rwxr-xr-x 1 vries users 807376 Mar 26 17:16 j4/4
-rw-r--r-- 1 vries users  64543 Mar 26 17:16 j4/5
...

But it doesn't give reproducible results:
...
$ md5sum j1/*
e6e655f7b5d1078672c8b0da99ab8c41  j1/1
e6e655f7b5d1078672c8b0da99ab8c41  j1/2
d833aa3ad6ad35597e1b7d0635b401cf  j1/3
d833aa3ad6ad35597e1b7d0635b401cf  j1/4
d5282aa9d065f1d00fd7a46c54ebde8d  j1/5
$ md5sum j4/*
de1645ce60bba6f345b2334825deb01f  j4/1
de1645ce60bba6f345b2334825deb01f  j4/2
ac2f16c50cf3d31be1f42f35ced4a091  j4/3
ac2f16c50cf3d31be1f42f35ced4a091  j4/4
7fc3cd2c2514c8bf1f23348a27025b8d  j4/5
...

The temporary multifile section contributions happen in random
order, so consequently the multifile layout will be different, and the
files referring to the multifile will be different.

Any comments?

Thanks,
- Tom

Allow parallel multifile with -p -e

2021-03-26  Tom de Vries  <tdevries@suse.de>

	PR dwz/25951
	* args.c (parse_args): Allow max_forks > 1 in combination with
	multifile, provided -j and -e are used.
	* dwz.c (write_multifile): Lock multi_info_fd before use.  Refresh
	multi_*_off.
	(encode_child_exit_status, decode_child_exit_status): Handle
	skip_multifile.
	(dwz_files_1): Allow max_forks > 1 in combination with
	multifile.

---
 args.c |  4 ++++
 dwz.c  | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/args.c b/args.c
index d44e632..67301e9 100644
--- a/args.c
+++ b/args.c
@@ -708,4 +708,8 @@ parse_args (int argc, char *argv[], bool *hardlink, const char **outfile)
 	 threads with only 2 cores.  */
       max_forks = nprocs / 2;
     }
+
+  if (max_forks > 1 && multifile
+      && multifile_force_ptr_size == 0 && multifile_force_endian == 0)
+    max_forks = 0;
 }
diff --git a/dwz.c b/dwz.c
index 1be4f2a..2e85861 100644
--- a/dwz.c
+++ b/dwz.c
@@ -15068,6 +15068,17 @@ write_multifile (DSO *dso, struct file_result *res)
   unsigned int i;
   int ret = 0;
 
+  if (max_forks > 1)
+    {
+      lockf (multi_info_fd, F_LOCK, 0);
+
+      multi_info_off = lseek (multi_info_fd, 0L, SEEK_END);
+      multi_abbrev_off = lseek (multi_abbrev_fd, 0L, SEEK_END);
+      multi_line_off = lseek (multi_line_fd, 0L, SEEK_END);
+      multi_str_off = lseek (multi_str_fd, 0L, SEEK_END);
+      multi_macro_off = lseek (multi_macro_fd, 0L, SEEK_END);
+    }
+
   if (unlikely (progress_p))
     {
       report_progress ();
@@ -15091,6 +15102,8 @@ write_multifile (DSO *dso, struct file_result *res)
       error (0, 0, "Multi-file optimization not allowed for different"
 	     " pointer sizes");
       multifile = NULL;
+      if (max_forks > 1)
+	lockf (multi_info_fd, F_ULOCK, 0);
       return 1;
     }
   else
@@ -15229,6 +15242,8 @@ write_multifile (DSO *dso, struct file_result *res)
       debug_sections[i].new_size = saved_new_size[i];
       saved_new_data[i] = NULL;
     }
+  if (max_forks > 1)
+    lockf (multi_info_fd, F_ULOCK, 0);
   return ret;
 }
 
@@ -16410,12 +16425,13 @@ update_hardlinks (int nr_files, char *files[], struct file_result *resa)
 static int
 encode_child_exit_status (int thisret, struct file_result *res)
 {
+  assert (thisret == 0 ||  thisret == 1);
   if (thisret == 0 && res->low_mem_p)
     thisret = 2;
-  assert (thisret >= 0 && thisret <= 2);
-  assert (res->res >= -3);
-  thisret = thisret + ((res->res + 3) << 2);
-  return thisret;
+  assert (res->res >= -3 && res->res <= 1);
+  return (thisret
+	  + ((res->res + 3) << 2)
+	  + ((res->skip_multifile ? 1 : 0) << 5));
 }
 
 /* Decode child process exit status.  */
@@ -16425,14 +16441,21 @@ decode_child_exit_status (int state, struct file_result *res)
   int ret;
   if (!WIFEXITED (state))
     error (1, 0, "Child dwz process got killed");
-  ret = WEXITSTATUS (state) & 0x3;
+  int status = WEXITSTATUS (state);
+  ret = status & 0x3;
+  status >>= 2;
+
   res->low_mem_p = false;
   if (ret == 2)
     {
       ret = 0;
       res->low_mem_p = true;
     }
-  res->res = (int)((WEXITSTATUS (state) & ~0x3) >> 2) - 3;
+
+  res->res = (int)(status & 0x7) - 3;
+  status >>= 3;
+
+  res->skip_multifile = (status & 0x1) ? true : false;
 
   return ret;
 }
@@ -16473,7 +16496,7 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
     hardlink = detect_hardlinks (nr_files, files, resa);
 
   int nr_forks = 0;
-  if (max_forks > 1 && multifile == NULL)
+  if (max_forks > 1)
     {
       pid_t pids[nr_files];
       for (i = 0; i < nr_files; i++)
@@ -16493,6 +16516,8 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
 		= decode_child_exit_status (state, res);
 	      if (thisret == 1)
 		ret = 1;
+	      else if (!res->low_mem_p && !res->skip_multifile && res->res >= 0)
+		successcount++;
 	      nr_forks--;
 	      int j;
 	      for (j = 0; j < i; ++j)
@@ -16533,6 +16558,8 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
 	  thisret = decode_child_exit_status (state, res);
 	  if (thisret == 1)
 	    ret = 1;
+	  else if (!res->low_mem_p && !res->skip_multifile && res->res >= 0)
+	    successcount++;
 	}
     }
   else
@@ -16567,6 +16594,14 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
       return ret;
     }
 
+  if (max_forks > 1)
+    {
+      multi_info_off = lseek (multi_info_fd, 0L, SEEK_END);
+      multi_abbrev_off = lseek (multi_abbrev_fd, 0L, SEEK_END);
+      multi_line_off = lseek (multi_line_fd, 0L, SEEK_END);
+      multi_str_off = lseek (multi_str_fd, 0L, SEEK_END);
+      multi_macro_off = lseek (multi_macro_fd, 0L, SEEK_END);
+    }
   if (multi_info_off == 0 && multi_str_off == 0 && multi_macro_off == 0)
     {
       if (!quiet)
@@ -16574,6 +16609,32 @@ dwz_files_1 (int nr_files, char *files[], bool hardlink,
       return ret;
     }
 
+  if (max_forks > 1)
+    {
+      for (i = 0; i < nr_files; i++)
+	{
+	  struct file_result *res = &resa[i];
+	  if (!res->low_mem_p && !res->skip_multifile && res->res >= 0)
+	    {
+	      int fd = open (files[i], O_RDONLY);
+	      if (fd < 0)
+		return ret;
+	      DSO *dso = fdopen_dso (fd, files[i]);
+	      if (dso == NULL)
+		{
+		  close (fd);
+		  return ret;
+		}
+	      assert (multi_ehdr.e_ident[0] == '\0');
+	      multi_ehdr = dso->ehdr;
+	      break;
+	    }
+	}
+
+      multi_ptr_size = multifile_force_ptr_size;
+      multi_endian = multifile_force_endian;
+    }
+
   unsigned int multifile_die_count = 0;
   int multi_fd = optimize_multifile (&multifile_die_count);
   DSO *dso;

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-03-30  9:42 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-26 16:40 [RFC] Allow parallel multifile with -p -e Tom de Vries
2021-03-26 16:47 ` Jakub Jelinek
2021-03-26 16:55   ` Tom de Vries
2021-03-30  9:42   ` [PATCH] " Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).