* patch to commit soon: PR25375 debuginfod prefetching
@ 2020-02-24 22:29 Frank Ch. Eigler
2020-02-25 11:32 ` Mark Wielaard
0 siblings, 1 reply; 3+ messages in thread
From: Frank Ch. Eigler @ 2020-02-24 22:29 UTC (permalink / raw)
To: elfutils-devel
Hi -
This patch has been baking on my public servers awhile and can make a
huge difference in performance. It's not something immediately
obvious how or whether to test, as it's a pure performance improvement.
Planning to push shortly.
commit ae8b89716116a6df124f8700f77460b5e97c12c4 (origin/fche/pr25375)
Author: Frank Ch. Eigler <fche@redhat.com>
Date: Sat Jan 25 18:43:07 2020 -0500
PR25375: fdcache prefetching to reduce repeated archive decompression
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 0acd70e4a916..7f432d7d9c41 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -355,6 +355,8 @@ static const struct argp_option options[] =
{ "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
#define ARGP_KEY_FDCACHE_MBS 0x1002
{ "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 },
+#define ARGP_KEY_FDCACHE_PREFETCH 0x1003
+ { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 }
};
@@ -394,6 +396,7 @@ static regex_t file_exclude_regex;
static bool traverse_logical;
static long fdcache_fds;
static long fdcache_mbs;
+static long fdcache_prefetch;
static string tmpdir;
static void set_metric(const string& key, int64_t value);
@@ -476,6 +479,9 @@ parse_opt (int key, char *arg,
case ARGP_KEY_FDCACHE_MBS:
fdcache_mbs = atol (arg);
break;
+ case ARGP_KEY_FDCACHE_PREFETCH:
+ fdcache_prefetch = atol (arg);
+ break;
case ARGP_KEY_ARG:
source_paths.insert(string(arg));
break;
@@ -975,14 +981,14 @@ class libarchive_fdcache
string archive;
string entry;
string fd;
- long fd_size_mb; // rounded up megabytes
+ double fd_size_mb; // slightly rounded up megabytes
};
deque<fdcache_entry> lru; // @head: most recently used
long max_fds;
long max_mbs;
public:
- void intern(const string& a, const string& b, string fd, off_t sz)
+ void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
{
{
unique_lock<mutex> lock(fdcache_lock);
@@ -995,11 +1001,15 @@ class libarchive_fdcache
break; // must not continue iterating
}
}
- long mb = ((sz+1023)/1024+1023)/1024;
+ double mb = (sz+65535)/1048576.0; // round up to 64K block
fdcache_entry n = { a, b, fd, mb };
- lru.push_front(n);
+ if (front_p)
+ lru.push_front(n);
+ else
+ lru.push_back(n);
if (verbose > 3)
- obatched(clog) << "fdcache interned a=" << a << " b=" << b << " fd=" << fd << " mb=" << mb << endl;
+ obatched(clog) << "fdcache interned a=" << a << " b=" << b
+ << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
}
this->limit(max_fds, max_mbs); // age cache if required
@@ -1022,6 +1032,17 @@ class libarchive_fdcache
return -1;
}
+ int probe(const string& a, const string& b) // just a cache residency check - don't modify LRU state, don't open
+ {
+ unique_lock<mutex> lock(fdcache_lock);
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ {
+ if (i->archive == a && i->entry == b)
+ return true;
+ }
+ return false;
+ }
+
void clear(const string& a, const string& b)
{
unique_lock<mutex> lock(fdcache_lock);
@@ -1047,7 +1068,7 @@ class libarchive_fdcache
this->max_mbs = maxmbs;
long total_fd = 0;
- long total_mb = 0;
+ double total_mb = 0.0;
for (auto i = lru.begin(); i < lru.end(); i++)
{
// accumulate totals from most recently used one going backward
@@ -1117,6 +1138,7 @@ handle_buildid_r_match (int64_t b_mtime,
return 0;
}
+ // check for a match in the fdcache first
int fd = fdcache.lookup(b_source0, b_source1);
while (fd >= 0) // got one!; NB: this is really an if() with a possible branch out to the end
{
@@ -1152,6 +1174,7 @@ handle_buildid_r_match (int64_t b_mtime,
// NB: see, we never go around the 'loop' more than once
}
+ // no match ... grumble, must process the archive
string archive_decoder = "/dev/null";
string archive_extension = "";
for (auto&& arch : scan_archives)
@@ -1196,8 +1219,19 @@ handle_buildid_r_match (int64_t b_mtime,
if (rc != ARCHIVE_OK)
throw archive_exception(a, "cannot open archive from pipe");
- while(1) // parse cpio archive entries
+ // archive traversal is in three stages:
+ // 1) skip entries whose names do not match the requested one
+ // 2) extract the matching entry name (set r = result)
+ // 3) extract some number of prefetched entries (just into fdcache)
+ // 4) abort any further processing
+ struct MHD_Response* r = 0; // will set in stage 2
+ unsigned prefetch_count = fdcache_prefetch; // will decrement in stage 3
+
+ while(r == 0 || prefetch_count > 0) // stage 1, 2, or 3
{
+ if (interrupted)
+ break;
+
struct archive_entry *e;
rc = archive_read_next_header (a, &e);
if (rc != ARCHIVE_OK)
@@ -1229,18 +1263,32 @@ handle_buildid_r_match (int64_t b_mtime,
throw archive_exception(a, "cannot extract file");
}
+ if (r != 0) // stage 3
+ {
+ // NB: now we know we have a complete reusable file; make fdcache
+ // responsible for unlinking it later.
+ fdcache.intern(b_source0, not_source1,
+ tmppath, archive_entry_size(e),
+ false); // prefetched ones go to back of lru
+ prefetch_count --;
+ close (fd); // we're not saving this fd to make a mhd-response from!
+ continue;
+ }
+
// NB: now we know we have a complete reusable file; make fdcache
// responsible for unlinking it later.
- fdcache.intern(b_source0, b_source1, tmppath, archive_entry_size(e));
+ fdcache.intern(b_source0, b_source1,
+ tmppath, archive_entry_size(e),
+ true); // requested ones go to the front of lru
inc_metric ("http_responses_total","result",archive_extension + " archive");
- struct MHD_Response* r = MHD_create_response_from_fd (archive_entry_size(e), fd);
+ r = MHD_create_response_from_fd (archive_entry_size(e), fd);
if (r == 0)
{
if (verbose)
obatched(clog) << "cannot create fd-response for " << b_source0 << endl;
close(fd);
- break; // assume no chance of better luck around another iteration
+ break; // assume no chance of better luck around another iteration; no other copies of same file
}
else
{
@@ -1251,12 +1299,12 @@ handle_buildid_r_match (int64_t b_mtime,
/* libmicrohttpd will close it. */
if (result_fd)
*result_fd = fd;
- return r;
+ continue;
}
}
// XXX: rpm/file not found: delete this R entry?
- return 0;
+ return r;
}
@@ -2809,7 +2857,8 @@ main (int argc, char *argv[])
fdcache_mbs = 1024; // 1 gigabyte
else
fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
- fdcache_fds = concurrency * 2;
+ fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
+ fdcache_fds = (concurrency + fdcache_prefetch) * 2;
/* Parse and process arguments. */
int remaining;
@@ -2943,6 +2992,7 @@ main (int argc, char *argv[])
obatched(clog) << "rescan time " << rescan_s << endl;
obatched(clog) << "fdcache fds " << fdcache_fds << endl;
obatched(clog) << "fdcache mbs " << fdcache_mbs << endl;
+ obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl;
obatched(clog) << "fdcache tmpdir " << tmpdir << endl;
obatched(clog) << "groom time " << groom_s << endl;
if (scan_archives.size()>0)
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index ca844aedcfdc..ad4b2d993c8a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -193,14 +193,17 @@ loops in the symbolic directory tree might lead to \fIinfinite
traversal\fP.
.TP
-.B "\-\-fdcache-fds=NUM" "\-\-fdcache-mbs=MB"
+.B "\-\-fdcache\-fds=NUM" "\-\-fdcache\-mbs=MB" "\-\-fdcache-prefetch=NUM2"
Configure limits on a cache that keeps recently extracted files from
-archives. Up to NUM files and up to a total of MB megabytes will be
-kept extracted, in order to avoid having to decompress their archives
-again. The default NUM and MB values depend on the concurrency of the
-system, and on the available disk space on the $TMPDIR or \fB/tmp\fP
-filesystem. This is because that is where the most recently used
-extracted files are kept. Grooming cleans this cache.
+archives. Up to NUM requested files and up to a total of MB megabytes
+will be kept extracted, in order to avoid having to decompress their
+archives over and over again. In addition, up to NUM2 other files
+from an archive may be prefetched into the cache before they are even
+requested. The default NUM, NUM2, and MB values depend on the
+concurrency of the system, and on the available disk space on the
+$TMPDIR or \fB/tmp\fP filesystem. This is because that is where the
+most recently used extracted files are kept. Grooming cleans this
+cache.
.TP
.B "\-v"
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: patch to commit soon: PR25375 debuginfod prefetching
2020-02-24 22:29 patch to commit soon: PR25375 debuginfod prefetching Frank Ch. Eigler
@ 2020-02-25 11:32 ` Mark Wielaard
2020-02-25 14:49 ` Frank Ch. Eigler
0 siblings, 1 reply; 3+ messages in thread
From: Mark Wielaard @ 2020-02-25 11:32 UTC (permalink / raw)
To: Frank Ch. Eigler, elfutils-devel
Hi Frank,
On Mon, 2020-02-24 at 17:29 -0500, Frank Ch. Eigler wrote:
> This patch has been baking on my public servers awhile and can make a
> huge difference in performance. It's not something immediately
> obvious how or whether to test, as it's a pure performance improvement.
> Planning to push shortly.
As far as I understand it, it looks good.
One tiny question:
> @@ -2809,7 +2857,8 @@ main (int argc, char *argv[])
> fdcache_mbs = 1024; // 1 gigabyte
> else
> fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space
> - fdcache_fds = concurrency * 2;
> + fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
> + fdcache_fds = (concurrency + fdcache_prefetch) * 2;
Here fdcache_prefetch is set and used before argp_parse () is called,
which would set it to the user supplied value (if any). Is that
intentional?
Cheers,
Mark
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: patch to commit soon: PR25375 debuginfod prefetching
2020-02-25 11:32 ` Mark Wielaard
@ 2020-02-25 14:49 ` Frank Ch. Eigler
0 siblings, 0 replies; 3+ messages in thread
From: Frank Ch. Eigler @ 2020-02-25 14:49 UTC (permalink / raw)
To: Mark Wielaard; +Cc: elfutils-devel
Hi -
> > + fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression
> > + fdcache_fds = (concurrency + fdcache_prefetch) * 2;
>
> Here fdcache_prefetch is set and used before argp_parse () is called,
> which would set it to the user supplied value (if any). Is that
> intentional?
Yeah, the idea is to provide some reasonable defaults, and then allow
the command line options to override them. In this case, the user
might want to override both. It'll be specific to the site anyway,
depending on distribution of rpm sizes vs. available storage.
- FChE
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2020-02-25 14:49 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-02-24 22:29 patch to commit soon: PR25375 debuginfod prefetching Frank Ch. Eigler
2020-02-25 11:32 ` Mark Wielaard
2020-02-25 14:49 ` Frank Ch. Eigler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).