public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
* patch to commit soon: PR25375 debuginfod prefetching
@ 2020-02-24 22:29 Frank Ch. Eigler
  2020-02-25 11:32 ` Mark Wielaard
  0 siblings, 1 reply; 3+ messages in thread
From: Frank Ch. Eigler @ 2020-02-24 22:29 UTC (permalink / raw)
  To: elfutils-devel

Hi -

This patch has been baking on my public servers awhile and can make a
huge difference in performance.  It's not something immediately
obvious how or whether to test, as it's a pure performance improvement.
Planning to push shortly.

commit ae8b89716116a6df124f8700f77460b5e97c12c4 (origin/fche/pr25375)
Author: Frank Ch. Eigler <fche@redhat.com>
Date:   Sat Jan 25 18:43:07 2020 -0500

    PR25375: fdcache prefetching to reduce repeated archive decompression

diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 0acd70e4a916..7f432d7d9c41 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -355,6 +355,8 @@ static const struct argp_option options[] =
    { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
 #define ARGP_KEY_FDCACHE_MBS 0x1002
    { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_PREFETCH 0x1003
+   { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
    { NULL, 0, NULL, 0, NULL, 0 }
   };
 
@@ -394,6 +396,7 @@ static regex_t file_exclude_regex;
 static bool traverse_logical;
 static long fdcache_fds;
 static long fdcache_mbs;
+static long fdcache_prefetch;
 static string tmpdir;
 
 static void set_metric(const string& key, int64_t value);
@@ -476,6 +479,9 @@ parse_opt (int key, char *arg,
     case ARGP_KEY_FDCACHE_MBS:
       fdcache_mbs = atol (arg);
       break;
+    case ARGP_KEY_FDCACHE_PREFETCH:
+      fdcache_prefetch = atol (arg);
+      break;
     case ARGP_KEY_ARG:
       source_paths.insert(string(arg));
       break;
@@ -975,14 +981,14 @@ class libarchive_fdcache
     string archive;
     string entry;
     string fd;
-    long fd_size_mb; // rounded up megabytes
+    double fd_size_mb; // slightly rounded up megabytes
   };
   deque<fdcache_entry> lru; // @head: most recently used
   long max_fds;
   long max_mbs;
 
 public:
-  void intern(const string& a, const string& b, string fd, off_t sz)
+  void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
   {
     {
       unique_lock<mutex> lock(fdcache_lock);
@@ -995,11 +1001,15 @@ class libarchive_fdcache
               break; // must not continue iterating
             }
         }
-      long mb = ((sz+1023)/1024+1023)/1024;
+      double mb = (sz+65535)/1048576.0; // round up to 64K block
       fdcache_entry n = { a, b, fd, mb };
-      lru.push_front(n);
+      if (front_p)
+        lru.push_front(n);
+      else
+        lru.push_back(n);
     if (verbose > 3)
-      obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl;
+      obatched(clog) << "fdcache interned a=" << a << " b=" << b
+                     << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
     }
 
     this->limit(max_fds, max_mbs); // age cache if required
@@ -1022,6 +1032,17 @@ class libarchive_fdcache
     return -1;
   }
 
+  int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
+  {
+    unique_lock<mutex> lock(fdcache_lock);
+    for (auto i = lru.begin(); i < lru.end(); i++)
+      {
+        if (i->archive == a && i->entry == b)
+          return true;
+      }
+    return false;
+  }
+
   void clear(const string& a, const string& b)
   {
     unique_lock<mutex> lock(fdcache_lock);
@@ -1047,7 +1068,7 @@ class libarchive_fdcache
     this->max_mbs = maxmbs;
 
     long total_fd = 0;
-    long total_mb = 0;
+    double total_mb = 0.0;
     for (auto i = lru.begin(); i < lru.end(); i++)
       {
         // accumulate totals from most recently used one going backward
@@ -1117,6 +1138,7 @@ handle_buildid_r_match (int64_t b_mtime,
       return 0;
     }
 
+  // check for a match in the fdcache first
   int fd = fdcache.lookup(b_source0, b_source1);
   while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
     {
@@ -1152,6 +1174,7 @@ handle_buildid_r_match (int64_t b_mtime,
       // NB: see, we never go around the 'loop' more than once
     }
 
+  // no match ... grumble, must process the archive
   string archive_decoder = "/dev/null";
   string archive_extension = "";
   for (auto&& arch : scan_archives)
@@ -1196,8 +1219,19 @@ handle_buildid_r_match (int64_t b_mtime,
   if (rc != ARCHIVE_OK)
     throw archive_exception(a, "cannot open archive from pipe");
 
-  while(1) // parse cpio archive entries
+  // archive traversal is in three stages:
+  // 1) skip entries whose names do not match the requested one
+  // 2) extract the matching entry name (set r = result)
+  // 3) extract some number of prefetched entries (just into fdcache)
+  // 4) abort any further processing
+  struct MHD_Response* r = 0;                 // will set in stage 2
+  unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3
+
+  while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
     {
+      if (interrupted)
+        break;
+
       struct archive_entry *e;
       rc = archive_read_next_header (a, &e);
       if (rc != ARCHIVE_OK)
@@ -1229,18 +1263,32 @@ handle_buildid_r_match (int64_t b_mtime,
           throw archive_exception(a, "cannot extract file");
         }
 
+      if (r != 0) // stage 3
+        {
+          // NB: now we know we have a complete reusable file; make fdcache
+          // responsible for unlinking it later.
+          fdcache.intern(b_source0, not_source1,
+                         tmppath, archive_entry_size(e),
+                         false); // prefetched ones go to back of lru
+          prefetch_count --;
+          close (fd); // we're not saving this fd to make a mhd-response from!
+          continue;
+        }
+
       // NB: now we know we have a complete reusable file; make fdcache
       // responsible for unlinking it later.
-      fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e));
+      fdcache.intern(b_source0, b_source1,
+                     tmppath, archive_entry_size(e),
+                     true); // requested ones go to the front of lru
 
       inc_metric ("http_responses_total","result",archive_extension + " archive");
-      struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
+      r = MHD_create_response_from_fd (archive_entry_size(e), fd);
       if (r == 0)
         {
           if (verbose)
             obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
           close(fd);
-          break; // assume no chance of better luck around another iteration
+          break; // assume no chance of better luck around another iteration; no other copies of same file
         }
       else
         {
@@ -1251,12 +1299,12 @@ handle_buildid_r_match (int64_t b_mtime,
           /* libmicrohttpd will close it. */
           if (result_fd)
             *result_fd = fd;
-          return r;
+          continue;
         }
     }
 
   // XXX: rpm/file not found: delete this R entry?
-  return 0;
+  return r;
 }
 
 
@@ -2809,7 +2857,8 @@ main (int argc, char *argv[])
     fdcache_mbs = 1024; // 1 gigabyte
   else
     fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
-  fdcache_fds = concurrency * 2;
+  fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
+  fdcache_fds = (concurrency + fdcache_prefetch) * 2;
 
   /* Parse and process arguments.  */
   int remaining;
@@ -2943,6 +2992,7 @@ main (int argc, char *argv[])
   obatched(clog) << "rescan time " << rescan_s << endl;
   obatched(clog) << "fdcache fds " << fdcache_fds << endl;
   obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
+  obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
   obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
   obatched(clog) << "groom time " << groom_s << endl;
   if (scan_archives.size()>0)
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index ca844aedcfdc..ad4b2d993c8a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -193,14 +193,17 @@ loops in the symbolic directory tree might lead to \fIinfinite
 traversal\fP.
 
 .TP
-.B "\-\-fdcache-fds=NUM"  "\-\-fdcache-mbs=MB"
+.B "\-\-fdcache\-fds=NUM"  "\-\-fdcache\-mbs=MB"  "\-\-fdcache-prefetch=NUM2"
 Configure limits on a cache that keeps recently extracted files from
-archives.  Up to NUM files and up to a total of MB megabytes will be
-kept extracted, in order to avoid having to decompress their archives
-again.  The default NUM and MB values depend on the concurrency of the
-system, and on the available disk space on the $TMPDIR or \fB/tmp\fP
-filesystem.  This is because that is where the most recently used
-extracted files are kept.  Grooming cleans this cache.
+archives.  Up to NUM requested files and up to a total of MB megabytes
+will be kept extracted, in order to avoid having to decompress their
+archives over and over again.  In addition, up to NUM2 other files
+from an archive may be prefetched into the cache before they are even
+requested.  The default NUM, NUM2, and MB values depend on the
+concurrency of the system, and on the available disk space on the
+$TMPDIR or \fB/tmp\fP filesystem.  This is because that is where the
+most recently used extracted files are kept.  Grooming cleans this
+cache.
 
 .TP
 .B "\-v"

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: patch to commit soon: PR25375 debuginfod prefetching
  2020-02-24 22:29 patch to commit soon: PR25375 debuginfod prefetching Frank Ch. Eigler
@ 2020-02-25 11:32 ` Mark Wielaard
  2020-02-25 14:49   ` Frank Ch. Eigler
  0 siblings, 1 reply; 3+ messages in thread
From: Mark Wielaard @ 2020-02-25 11:32 UTC (permalink / raw)
  To: Frank Ch. Eigler, elfutils-devel

Hi Frank,

On Mon, 2020-02-24 at 17:29 -0500, Frank Ch. Eigler wrote:
> This patch has been baking on my public servers awhile and can make a
> huge difference in performance.  It's not something immediately
> obvious how or whether to test, as it's a pure performance improvement.
> Planning to push shortly.

As far as I understand it, it looks good.
One tiny question:

> @@ -2809,7 +2857,8 @@ main (int argc, char *argv[])
>      fdcache_mbs = 1024; // 1 gigabyte
>    else
>      fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
> -  fdcache_fds = concurrency * 2;
> +  fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
> +  fdcache_fds = (concurrency + fdcache_prefetch) * 2;

Here fdcache_prefetch is set and used before argp_parse () is called,
which would set it to the user supplied value (if any). Is that
intentional?

Cheers,

Mark

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: patch to commit soon: PR25375 debuginfod prefetching
  2020-02-25 11:32 ` Mark Wielaard
@ 2020-02-25 14:49   ` Frank Ch. Eigler
  0 siblings, 0 replies; 3+ messages in thread
From: Frank Ch. Eigler @ 2020-02-25 14:49 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel

Hi - 

> > +  fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
> > +  fdcache_fds = (concurrency + fdcache_prefetch) * 2;
> 
> Here fdcache_prefetch is set and used before argp_parse () is called,
> which would set it to the user supplied value (if any). Is that
> intentional?

Yeah, the idea is to provide some reasonable defaults, and then allow
the command line options to override them.  In this case, the user
might want to override both.  It'll be specific to the site anyway,
depending on distribution of rpm sizes vs. available storage.

- FChE

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2020-02-25 14:49 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-24 22:29 patch to commit soon: PR25375 debuginfod prefetching Frank Ch. Eigler
2020-02-25 11:32 ` Mark Wielaard
2020-02-25 14:49   ` Frank Ch. Eigler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).