* [PATCH] PR25756 more debuginfod metrics
@ 2020-10-21 0:44 Frank Ch. Eigler
2020-10-21 12:30 ` Mark Wielaard
0 siblings, 1 reply; 4+ messages in thread
From: Frank Ch. Eigler @ 2020-10-21 0:44 UTC (permalink / raw)
To: elfutils-devel
Subject: [PATCH 1/2] PR26756: add more prometheus metrics to debuginfod
From: Frank Ch. Eigler <fche@redhat.com>
Add an error_count{} family of metrics for each libc/libarchive/http
exception instance created during operation. Add a family of fdcache*
metrics for tracking fdcache operations and status. Test via a
injecting a permission-000 empty nothing.rpm in the testsuite.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
---
debuginfod/ChangeLog | 6 ++++
debuginfod/debuginfod.cxx | 56 +++++++++++++++++++++++++++---------
tests/ChangeLog | 6 ++++
tests/run-debuginfod-find.sh | 12 ++++++--
4 files changed, 63 insertions(+), 17 deletions(-)
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 8cb89967e9d1..236e2ca3270f 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26756: more prometheus metrics
+ * debuginfod.cxx (*_exception): Add counters for error occurrences.
+ (fdcache::*): Add counters for fdcache operations and status.
+
2020-09-18 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (scan_source_file, archive_classify): Store only
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 140b7789de3b..a5334d2a7fc5 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception
struct libc_exception: public reportable_exception
{
libc_exception(int rc, const string& msg):
- reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
+ reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ inc_metric("error_count","libc",strerror(rc));
+ }
};
struct archive_exception: public reportable_exception
{
archive_exception(const string& msg):
- reportable_exception(string("libarchive error: ") + msg) {}
+ reportable_exception(string("libarchive error: ") + msg) {
+ inc_metric("error_count","libarchive",msg);
+ }
archive_exception(struct archive* a, const string& msg):
- reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
+ reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
+ inc_metric("error_count","libarchive",msg);
+ }
};
struct elfutils_exception: public reportable_exception
{
elfutils_exception(int rc, const string& msg):
- reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
+ reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
+ inc_metric("error_count","elfutils",elf_errmsg(rc));
+ }
};
@@ -1083,6 +1091,15 @@ class libarchive_fdcache
long max_mbs;
public:
+ void set_metrics()
+ {
+ double total_mb = 0.0;
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ total_mb += i->fd_size_mb;
+ set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
+ set_metric("fdcache_count", lru.size());
+ }
+
void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
{
{
@@ -1093,19 +1110,24 @@ class libarchive_fdcache
{
unlink (i->fd.c_str());
lru.erase(i);
+ inc_metric("fdcache_op_count","op","dequeue");
break; // must not continue iterating
}
}
double mb = (sz+65535)/1048576.0; // round up to 64K block
fdcache_entry n = { a, b, fd, mb };
- if (front_p)
+ if (front_p) {
+ inc_metric("fdcache_op_count","op","enqueue_front");
lru.push_front(n);
- else
+ } else {
+ inc_metric("fdcache_op_count","op","enqueue_back");
lru.push_back(n);
- if (verbose > 3)
- obatched(clog) << "fdcache interned a=" << a << " b=" << b
- << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+ }
+ if (verbose > 3)
+ obatched(clog) << "fdcache interned a=" << a << " b=" << b
+ << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
}
+ set_metrics();
// NB: we age the cache at lookup time too
if (front_p)
@@ -1124,7 +1146,7 @@ class libarchive_fdcache
fdcache_entry n = *i;
lru.erase(i); // invalidates i, so no more iteration!
lru.push_front(n);
-
+ inc_metric("fdcache_op_count","op","requeue_front");
fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
break;
}
@@ -1142,9 +1164,12 @@ class libarchive_fdcache
unique_lock<mutex> lock(fdcache_lock);
for (auto i = lru.begin(); i < lru.end(); i++)
{
- if (i->archive == a && i->entry == b)
+ if (i->archive == a && i->entry == b) {
+ inc_metric("fdcache_op_count","op","probe_hit");
return true;
+ }
}
+ inc_metric("fdcache_op_count","op","probe_miss");
return false;
}
@@ -1157,7 +1182,9 @@ class libarchive_fdcache
{ // found it; move it to head of lru
fdcache_entry n = *i;
lru.erase(i); // invalidates i, so no more iteration!
+ inc_metric("fdcache_op_count","op","clear");
unlink (n.fd.c_str());
+ set_metrics();
return;
}
}
@@ -1188,14 +1215,15 @@ class libarchive_fdcache
if (verbose > 3)
obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
<< " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+ inc_metric("fdcache_op_count","op","evict");
unlink (j->fd.c_str());
}
lru.erase(i, lru.end()); // erase the nodes generally
break;
}
-
}
+ set_metrics();
}
~libarchive_fdcache()
@@ -2263,7 +2291,7 @@ scan_source_file (const string& rps, const stat_t& st,
if (verbose > 3)
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
-
+
ps_upsert_files
.reset()
.bind(1, dwarfsrc_canon)
@@ -2435,7 +2463,7 @@ archive_classify (const string& rps, string& archive_extension,
if (verbose > 3)
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
-
+
ps_upsert_files
.reset()
.bind(1, dwarfsrc_canon)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 5a8b5899b9c5..aa68ffd383e8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26756: more prometheus metrics
+ * run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
+ and check for new metrics.
+
2020-09-18 Mark Wielaard <mark@klomp.org>
* run-readelf-compressed-zstd.sh: New test.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 730bb0e10b47..79976f70dc92 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -95,6 +95,10 @@ wait_ready()
fi
}
+# create a 000 empty .rpm file to evoke a metric-visible error
+touch R/nothing.rpm
+chmod 000 R/nothing.rpm
+
env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
PID1=$!
tempfiles vlog4
@@ -240,8 +244,8 @@ fi
cp -rvp ${abs_srcdir}/debuginfod-tars Z
kill -USR1 $PID1
-# All rpms need to be in the index
-rpms=$(find R -name \*rpm | wc -l)
+# All rpms need to be in the index, except the dummy permission-000 one
+rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms
txz=$(find Z -name \*tar.xz | wc -l)
wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz
@@ -255,7 +259,7 @@ kill -USR1 $PID1 # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr
mkdir extracted
cd extracted
subdir=0;
-newrpms=$(find ../R -name \*\.rpm)
+newrpms=$(find ../R -name \*\.rpm | grep -v nothing)
for i in $newrpms; do
subdir=$[$subdir+1];
mkdir $subdir;
@@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum'
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count'
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count'
# And generate a few errors into the second debuginfod's logs, for analysis just below
curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true
--
2.26.2
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] PR25756 more debuginfod metrics
2020-10-21 0:44 [PATCH] PR25756 more debuginfod metrics Frank Ch. Eigler
@ 2020-10-21 12:30 ` Mark Wielaard
2020-10-21 14:01 ` Frank Ch. Eigler
0 siblings, 1 reply; 4+ messages in thread
From: Mark Wielaard @ 2020-10-21 12:30 UTC (permalink / raw)
To: Frank Ch. Eigler, elfutils-devel
Hi Frank,
On Tue, 2020-10-20 at 20:44 -0400, Frank Ch. Eigler via Elfutils-devel wrote:
> Subject: [PATCH 1/2] PR26756: add more prometheus metrics to debuginfod
>
> From: Frank Ch. Eigler <fche@redhat.com>
>
> Add an error_count{} family of metrics for each libc/libarchive/http
> exception instance created during operation. Add a family of fdcache*
> metrics for tracking fdcache operations and status. Test via a
> injecting a permission-000 empty nothing.rpm in the testsuite.
I found this somewhat hard to review since the indenting isn't
following the (GNU) style used in the rest of the file. Please place
curly brackets on their own line when possible.
> Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
> ---
> debuginfod/ChangeLog | 6 ++++
> debuginfod/debuginfod.cxx | 56 +++++++++++++++++++++++++++-------
> --
> tests/ChangeLog | 6 ++++
> tests/run-debuginfod-find.sh | 12 ++++++--
> 4 files changed, 63 insertions(+), 17 deletions(-)
>
> diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
> index 8cb89967e9d1..236e2ca3270f 100644
> --- a/debuginfod/ChangeLog
> +++ b/debuginfod/ChangeLog
> @@ -1,3 +1,9 @@
> +2020-10-20 Frank Ch. Eigler <fche@redhat.com>
> +
> + PR26756: more prometheus metrics
> + * debuginfod.cxx (*_exception): Add counters for error occurrences.
> + (fdcache::*): Add counters for fdcache operations and status.
Also a new public set_metrics() function.
In general these changes look correct.
Unfortunately the test crashes on my RHEL7 with DTS9 system:
Program terminated with signal SIGSEGV, Segmentation fault.
#0 0x000000000040dd58 in std::string::size (this=0xe63910)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:6226
6226 operator<(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
(gdb) where
#0 0x000000000040dd58 in std::string::size (this=0xe63910)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:6226
#1 std::string::compare (__str="fdcache_op_count{op=\"evict\"}",
this=0xe63910)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:5753
#2 std::operator< <char, std::char_traits<char>, std::allocator<char> > (
__rhs="fdcache_op_count{op=\"evict\"}",
__lhs=<error reading variable: Cannot access memory at address 0x225c>)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:6229
#3 std::less<std::string>::operator() (this=<optimized out>,
__y="fdcache_op_count{op=\"evict\"}",
__x=<error reading variable: Cannot access memory at address 0x225c>)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_function.h:386
#4 std::_Rb_tree<std::string, std::pair<std::string const, long>, std::_Select1st<std::pair<std::string const, long> >, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::_M_lower_bound (this=<optimized out>,
__k="fdcache_op_count{op=\"evict\"}", __y=0x7f922000ff00, __x=0xe638f0)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:1929
#5 std::_Rb_tree<std::string, std::pair<std::string const, long>, std::_Select1st<std::pair<std::string const, long> >, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::lower_bound (
__k="fdcache_op_count{op=\"evict\"}", this=0x435aa0 <metrics>)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:1282
#6 std::map<std::string, long, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::lower_bound (
__x="fdcache_op_count{op=\"evict\"}", this=0x435aa0 <metrics>)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_map.h:1258
#7 std::map<std::string, long, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::operator[] (
__k="fdcache_op_count{op=\"evict\"}", this=0x435aa0 <metrics>)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_map.h:495
#8 inc_metric (metric=..., lname=..., lvalue=...)
at /home/mark/src/elfutils/debuginfod/debuginfod.cxx:1716
#9 0x0000000000421db5 in libarchive_fdcache::limit (
this=this@entry=0x435ae0 <fdcache>, maxfds=maxfds@entry=0,
maxmbs=maxmbs@entry=0)
at /opt/rh/devtoolset-9/root/usr/include/c++/9/ext/new_allocator.h:80
#10 0x0000000000422760 in libarchive_fdcache::~libarchive_fdcache (
this=0x435ae0 <fdcache>, __in_chrg=<optimized out>)
at /home/mark/src/elfutils/debuginfod/debuginfod.cxx:1231
#11 0x00007f924ecb7ce9 in __run_exit_handlers (status=0,
listp=0x7f924f0456c8 <__exit_funcs>,
run_list_atexit=run_list_atexit@entry=true) at exit.c:77
#12 0x00007f924ecb7d37 in __GI_exit (status=<optimized out>) at exit.c:99
#13 0x00007f924eca055c in __libc_start_main (
main=0x40ad40 <main(int, char**)>, argc=10, argv=0x7ffc8ebaf6a8,
init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>,
stack_end=0x7ffc8ebaf698) at ../csu/libc-start.c:300
#14 0x000000000040c3e5 in _start ()
at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:211
Which seems to be caused by the libarchive_fdcache destructor calling
limit (0,0) triggering the new inc_metric which somehow cannot use the
metrics map?
> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index 5a8b5899b9c5..aa68ffd383e8 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,9 @@
> +2020-10-20 Frank Ch. Eigler <fche@redhat.com>
> +
> + PR26756: more prometheus metrics
> + * run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
> + and check for new metrics.
> +
> 2020-09-18 Mark Wielaard <mark@klomp.org>
>
> * run-readelf-compressed-zstd.sh: New test.
> diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
> index 730bb0e10b47..79976f70dc92 100755
> --- a/tests/run-debuginfod-find.sh
> +++ b/tests/run-debuginfod-find.sh
> @@ -95,6 +95,10 @@ wait_ready()
> fi
> }
>
> +# create a 000 empty .rpm file to evoke a metric-visible error
> +touch R/nothing.rpm
> +chmod 000 R/nothing.rpm
> +
Does this cause any issues during cleanup?
I would have expected something like:
if [ -f R/nothing.rpm ]; then chmod 644 R/nothing.rpm; fi
in cleanup(). But maybe the rm -f takes care of that?
Cheers,
Mark
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH] PR25756 more debuginfod metrics
2020-10-21 12:30 ` Mark Wielaard
@ 2020-10-21 14:01 ` Frank Ch. Eigler
2020-10-21 19:22 ` [PATCH v2] " Frank Ch. Eigler
0 siblings, 1 reply; 4+ messages in thread
From: Frank Ch. Eigler @ 2020-10-21 14:01 UTC (permalink / raw)
To: Mark Wielaard; +Cc: elfutils-devel
Hi -
> I found this somewhat hard to review since the indenting isn't
> following the (GNU) style used in the rest of the file. Please place
> curly brackets on their own line when possible.
OK.
> > +2020-10-20 Frank Ch. Eigler <fche@redhat.com>
> > +
> > + PR26756: more prometheus metrics
> > + * debuginfod.cxx (*_exception): Add counters for error occurrences.
> > + (fdcache::*): Add counters for fdcache operations and status.
>
> Also a new public set_metrics() function.
OK.
> In general these changes look correct.
> Unfortunately the test crashes on my RHEL7 with DTS9 system:
> [...]
> #11 0x00007f924ecb7ce9 in __run_exit_handlers (status=0,
> listp=0x7f924f0456c8 <__exit_funcs>,
> run_list_atexit=run_list_atexit@entry=true) at exit.c:77
> #12 0x00007f924ecb7d37 in __GI_exit (status=<optimized out>) at exit.c:99
> #13 0x00007f924eca055c in __libc_start_main (
> main=0x40ad40 <main(int, char**)>, argc=10, argv=0x7ffc8ebaf6a8,
> init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>,
> stack_end=0x7ffc8ebaf698) at ../csu/libc-start.c:300
> #14 0x000000000040c3e5 in _start ()
> at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:211
> Which seems to be caused by the libarchive_fdcache destructor calling
> limit (0,0) triggering the new inc_metric which somehow cannot use the
> metrics map?
Ah it's a global variable destructor sequencing issue.
Hmm what a pain, ok will think of a solution.
> > +# create a 000 empty .rpm file to evoke a metric-visible error
> > +touch R/nothing.rpm
> > +chmod 000 R/nothing.rpm
> > +
>
> Does this cause any issues during cleanup?
> I would have expected something like:
>
> if [ -f R/nothing.rpm ]; then chmod 644 R/nothing.rpm; fi
>
> in cleanup(). But maybe the rm -f takes care of that?
There's a "rm -rf R" in cleanup, so is covered.
- FChE
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2] PR25756 more debuginfod metrics
2020-10-21 14:01 ` Frank Ch. Eigler
@ 2020-10-21 19:22 ` Frank Ch. Eigler
0 siblings, 0 replies; 4+ messages in thread
From: Frank Ch. Eigler @ 2020-10-21 19:22 UTC (permalink / raw)
To: Mark Wielaard, elfutils-devel
Hi -
PR26756: add more prometheus metrics to debuginfod
Add an error_count{} family of metrics for each libc/libarchive/http
exception instance created during operation. Add a family of fdcache*
metrics for tracking fdcache operations and status. Test via a
injecting a permission-000 empty nothing.rpm in the testsuite.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 688404e5d567..3341f1876f22 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,11 @@
+2020-10-20 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26756: more prometheus metrics
+ * debuginfod.cxx (*_exception): Add counters for error occurrences.
+ (fdcache::*): Add counters for fdcache operations and status.
+ (fdcache::set_metric): New fn for overall stat counts.
+ (fdcache::limit): ... allow metric-less use from dtors.
+
2020-10-20 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (handle_buildid*): Add a parameter for detecting
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 7907fecc7419..2b68ff1f052d 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception
struct libc_exception: public reportable_exception
{
libc_exception(int rc, const string& msg):
- reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
+ reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+ inc_metric("error_count","libc",strerror(rc));
+ }
};
struct archive_exception: public reportable_exception
{
archive_exception(const string& msg):
- reportable_exception(string("libarchive error: ") + msg) {}
+ reportable_exception(string("libarchive error: ") + msg) {
+ inc_metric("error_count","libarchive",msg);
+ }
archive_exception(struct archive* a, const string& msg):
- reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
+ reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
+ inc_metric("error_count","libarchive",msg);
+ }
};
struct elfutils_exception: public reportable_exception
{
elfutils_exception(int rc, const string& msg):
- reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
+ reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
+ inc_metric("error_count","elfutils",elf_errmsg(rc));
+ }
};
@@ -1085,6 +1093,15 @@ class libarchive_fdcache
long max_mbs;
public:
+ void set_metrics()
+ {
+ double total_mb = 0.0;
+ for (auto i = lru.begin(); i < lru.end(); i++)
+ total_mb += i->fd_size_mb;
+ set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
+ set_metric("fdcache_count", lru.size());
+ }
+
void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
{
{
@@ -1095,19 +1112,27 @@ class libarchive_fdcache
{
unlink (i->fd.c_str());
lru.erase(i);
+ inc_metric("fdcache_op_count","op","dequeue");
break; // must not continue iterating
}
}
double mb = (sz+65535)/1048576.0; // round up to 64K block
fdcache_entry n = { a, b, fd, mb };
if (front_p)
- lru.push_front(n);
+ {
+ inc_metric("fdcache_op_count","op","enqueue_front");
+ lru.push_front(n);
+ }
else
- lru.push_back(n);
- if (verbose > 3)
- obatched(clog) << "fdcache interned a=" << a << " b=" << b
- << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+ {
+ inc_metric("fdcache_op_count","op","enqueue_back");
+ lru.push_back(n);
+ }
+ if (verbose > 3)
+ obatched(clog) << "fdcache interned a=" << a << " b=" << b
+ << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
}
+ set_metrics();
// NB: we age the cache at lookup time too
if (front_p)
@@ -1126,7 +1151,7 @@ class libarchive_fdcache
fdcache_entry n = *i;
lru.erase(i); // invalidates i, so no more iteration!
lru.push_front(n);
-
+ inc_metric("fdcache_op_count","op","requeue_front");
fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
break;
}
@@ -1145,8 +1170,12 @@ class libarchive_fdcache
for (auto i = lru.begin(); i < lru.end(); i++)
{
if (i->archive == a && i->entry == b)
- return true;
+ {
+ inc_metric("fdcache_op_count","op","probe_hit");
+ return true;
+ }
}
+ inc_metric("fdcache_op_count","op","probe_miss");
return false;
}
@@ -1159,13 +1188,15 @@ class libarchive_fdcache
{ // found it; move it to head of lru
fdcache_entry n = *i;
lru.erase(i); // invalidates i, so no more iteration!
+ inc_metric("fdcache_op_count","op","clear");
unlink (n.fd.c_str());
+ set_metrics();
return;
}
}
}
- void limit(long maxfds, long maxmbs)
+ void limit(long maxfds, long maxmbs, bool metrics_p = true)
{
if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
@@ -1190,19 +1221,23 @@ class libarchive_fdcache
if (verbose > 3)
obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
<< " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+ if (metrics_p)
+ inc_metric("fdcache_op_count","op","evict");
unlink (j->fd.c_str());
}
lru.erase(i, lru.end()); // erase the nodes generally
break;
}
-
}
+ if (metrics_p) set_metrics();
}
~libarchive_fdcache()
{
- limit(0, 0);
+ // unlink any fdcache entries in $TMPDIR
+ // don't update metrics; those globals may be already destroyed
+ limit(0, 0, false);
}
};
static libarchive_fdcache fdcache;
@@ -1638,6 +1673,8 @@ handle_buildid (MHD_Connection* conn,
static map<string,int64_t> metrics; // arbitrary data for /metrics query
// NB: store int64_t since all our metrics are integers; prometheus accepts double
static mutex metrics_lock;
+// NB: these objects get released during the process exit via global dtors
+// do not call them from within other global dtors
// utility function for assembling prometheus-compatible
// name="escaped-value" strings
@@ -2269,7 +2306,7 @@ scan_source_file (const string& rps, const stat_t& st,
if (verbose > 3)
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
-
+
ps_upsert_files
.reset()
.bind(1, dwarfsrc_canon)
@@ -2441,7 +2478,7 @@ archive_classify (const string& rps, string& archive_extension,
if (verbose > 3)
obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
}
-
+
ps_upsert_files
.reset()
.bind(1, dwarfsrc_canon)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 5a8b5899b9c5..aa68ffd383e8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20 Frank Ch. Eigler <fche@redhat.com>
+
+ PR26756: more prometheus metrics
+ * run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
+ and check for new metrics.
+
2020-09-18 Mark Wielaard <mark@klomp.org>
* run-readelf-compressed-zstd.sh: New test.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 730bb0e10b47..79976f70dc92 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -95,6 +95,10 @@ wait_ready()
fi
}
+# create a 000 empty .rpm file to evoke a metric-visible error
+touch R/nothing.rpm
+chmod 000 R/nothing.rpm
+
env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
PID1=$!
tempfiles vlog4
@@ -240,8 +244,8 @@ fi
cp -rvp ${abs_srcdir}/debuginfod-tars Z
kill -USR1 $PID1
-# All rpms need to be in the index
-rpms=$(find R -name \*rpm | wc -l)
+# All rpms need to be in the index, except the dummy permission-000 one
+rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms
txz=$(find Z -name \*tar.xz | wc -l)
wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz
@@ -255,7 +259,7 @@ kill -USR1 $PID1 # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr
mkdir extracted
cd extracted
subdir=0;
-newrpms=$(find ../R -name \*\.rpm)
+newrpms=$(find ../R -name \*\.rpm | grep -v nothing)
for i in $newrpms; do
subdir=$[$subdir+1];
mkdir $subdir;
@@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum'
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count'
curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count'
# And generate a few errors into the second debuginfod's logs, for analysis just below
curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-10-21 19:22 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-21 0:44 [PATCH] PR25756 more debuginfod metrics Frank Ch. Eigler
2020-10-21 12:30 ` Mark Wielaard
2020-10-21 14:01 ` Frank Ch. Eigler
2020-10-21 19:22 ` [PATCH v2] " Frank Ch. Eigler
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).