From: "Frank Ch. Eigler" <fche@redhat.com>
To: Mark Wielaard <mark@klomp.org>, elfutils-devel@sourceware.org
Subject: Re: [PATCH v2] PR25756 more debuginfod metrics
Date: Wed, 21 Oct 2020 15:22:29 -0400 [thread overview]
Message-ID: <20201021192229.GD21776@redhat.com> (raw)
In-Reply-To: <20201021140137.GB21776@redhat.com>
Hi -
PR26756: add more prometheus metrics to debuginfod
Add an error_count{} family of metrics for each libc/libarchive/http
exception instance created during operation. Add a family of fdcache*
metrics for tracking fdcache operations and status. Test via a
injecting a permission-000 empty nothing.rpm in the testsuite.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 688404e5d567..3341f1876f22 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,11 @@
+2020-10-20 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26756: more prometheus metrics
+ * debuginfod.cxx (*_exception): Add counters for error occurrences.
+ (fdcache::*): Add counters for fdcache operations and status.
+ (fdcache::set_metric): New fn for overall stat counts.
+ (fdcache::limit): ... allow metric-less use from dtors.
+
2020-10-20 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (handle_buildid*): Add a parameter for detecting
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 7907fecc7419..2b68ff1f052d 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception
struct libc_exception: public reportable_exception
{
libc_exception(int rc, const string& msg):
- reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
+ reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ inc_metric("error_count","libc",strerror(rc));
+ }
};
struct archive_exception: public reportable_exception
{
archive_exception(const string& msg):
- reportable_exception(string("libarchive error: ") + msg) {}
+ reportable_exception(string("libarchive error: ") + msg) {
+ inc_metric("error_count","libarchive",msg);
+ }
archive_exception(struct archive* a, const string& msg):
- reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
+ reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
+ inc_metric("error_count","libarchive",msg);
+ }
};
struct elfutils_exception: public reportable_exception
{
elfutils_exception(int rc, const string& msg):
- reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
+ reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
+ inc_metric("error_count","elfutils",elf_errmsg(rc));
+ }
};
@@ -1085,6 +1093,15 @@ class libarchive_fdcache
long max_mbs;
public:
+ void set_metrics()
+ {
+ double total_mb = 0.0;
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ total_mb += i->fd_size_mb;
+ set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
+ set_metric("fdcache_count", lru.size());
+ }
+
void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
{
{
@@ -1095,19 +1112,27 @@ class libarchive_fdcache
{
unlink (i->fd.c_str());
lru.erase(i);
+ inc_metric("fdcache_op_count","op","dequeue");
break; // must not continue iterating
}
}
double mb = (sz+65535)/1048576.0; // round up to 64K block
fdcache_entry n = { a, b, fd, mb };
if (front_p)
- lru.push_front(n);
+ {
+ inc_metric("fdcache_op_count","op","enqueue_front");
+ lru.push_front(n);
+ }
else
- lru.push_back(n);
- if (verbose > 3)
- obatched(clog) << "fdcache interned a=" << a << " b=" << b
- << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+ {
+ inc_metric("fdcache_op_count","op","enqueue_back");
+ lru.push_back(n);
+ }
+ if (verbose > 3)
+ obatched(clog) << "fdcache interned a=" << a << " b=" << b
+ << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
}
+ set_metrics();
// NB: we age the cache at lookup time too
if (front_p)
@@ -1126,7 +1151,7 @@ class libarchive_fdcache
fdcache_entry n = *i;
lru.erase(i); // invalidates i, so no more iteration!
lru.push_front(n);
-
+ inc_metric("fdcache_op_count","op","requeue_front");
fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
break;
}
@@ -1145,8 +1170,12 @@ class libarchive_fdcache
for (auto i = lru.begin(); i < lru.end(); i++)
{
if (i->archive == a && i->entry == b)
- return true;
+ {
+ inc_metric("fdcache_op_count","op","probe_hit");
+ return true;
+ }
}
+ inc_metric("fdcache_op_count","op","probe_miss");
return false;
}
@@ -1159,13 +1188,15 @@ class libarchive_fdcache
{ // found it; move it to head of lru
fdcache_entry n = *i;
lru.erase(i); // invalidates i, so no more iteration!
+ inc_metric("fdcache_op_count","op","clear");
unlink (n.fd.c_str());
+ set_metrics();
return;
}
}
}
- void limit(long maxfds, long maxmbs)
+ void limit(long maxfds, long maxmbs, bool metrics_p = true)
{
if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
@@ -1190,19 +1221,23 @@ class libarchive_fdcache
if (verbose > 3)
obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
<< " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+ if (metrics_p)
+ inc_metric("fdcache_op_count","op","evict");
unlink (j->fd.c_str());
}
lru.erase(i, lru.end()); // erase the nodes generally
break;
}
-
}
+ if (metrics_p) set_metrics();
}
~libarchive_fdcache()
{
- limit(0, 0);
+ // unlink any fdcache entries in $TMPDIR
+ // don't update metrics; those globals may be already destroyed
+ limit(0, 0, false);
}
};
static libarchive_fdcache fdcache;
@@ -1638,6 +1673,8 @@ handle_buildid (MHD_Connection* conn,
static map<string,int64_t> metrics; // arbitrary data for /metrics query
// NB: store int64_t since all our metrics are integers; prometheus accepts double
static mutex metrics_lock;
+// NB: these objects get released during the process exit via global dtors
+// do not call them from within other global dtors
// utility function for assembling prometheus-compatible
// name="escaped-value" strings
@@ -2269,7 +2306,7 @@ scan_source_file (const string& rps, const stat_t& st,
if (verbose > 3)
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
-
+
ps_upsert_files
.reset()
.bind(1, dwarfsrc_canon)
@@ -2441,7 +2478,7 @@ archive_classify (const string& rps, string& archive_extension,
if (verbose > 3)
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
-
+
ps_upsert_files
.reset()
.bind(1, dwarfsrc_canon)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 5a8b5899b9c5..aa68ffd383e8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26756: more prometheus metrics
+ * run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
+ and check for new metrics.
+
2020-09-18 Mark Wielaard <mark@klomp.org>
* run-readelf-compressed-zstd.sh: New test.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 730bb0e10b47..79976f70dc92 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -95,6 +95,10 @@ wait_ready()
fi
}
+# create a 000 empty .rpm file to evoke a metric-visible error
+touch R/nothing.rpm
+chmod 000 R/nothing.rpm
+
env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
PID1=$!
tempfiles vlog4
@@ -240,8 +244,8 @@ fi
cp -rvp ${abs_srcdir}/debuginfod-tars Z
kill -USR1 $PID1
-# All rpms need to be in the index
-rpms=$(find R -name \*rpm | wc -l)
+# All rpms need to be in the index, except the dummy permission-000 one
+rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms
txz=$(find Z -name \*tar.xz | wc -l)
wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz
@@ -255,7 +259,7 @@ kill -USR1 $PID1 # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr
mkdir extracted
cd extracted
subdir=0;
-newrpms=$(find ../R -name \*\.rpm)
+newrpms=$(find ../R -name \*\.rpm | grep -v nothing)
for i in $newrpms; do
subdir=$[$subdir+1];
mkdir $subdir;
@@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum'
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count'
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count'
# And generate a few errors into the second debuginfod's logs, for analysis just below
curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true
prev parent reply other threads:[~2020-10-21 19:22 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-10-21 0:44 [PATCH] " Frank Ch. Eigler
2020-10-21 12:30 ` Mark Wielaard
2020-10-21 14:01 ` Frank Ch. Eigler
2020-10-21 19:22 ` Frank Ch. Eigler [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20201021192229.GD21776@redhat.com \
--to=fche@redhat.com \
--cc=elfutils-devel@sourceware.org \
--cc=mark@klomp.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).