public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] PR25756 more debuginfod metrics
@ 2020-10-21  0:44 Frank Ch. Eigler
  2020-10-21 12:30 ` Mark Wielaard
  0 siblings, 1 reply; 4+ messages in thread
From: Frank Ch. Eigler @ 2020-10-21  0:44 UTC (permalink / raw)
  To: elfutils-devel

Subject: [PATCH 1/2] PR26756: add more prometheus metrics to debuginfod

From: Frank Ch. Eigler <fche@redhat.com>

Add an error_count{} family of metrics for each libc/libarchive/http
exception instance created during operation.  Add a family of fdcache*
metrics for tracking fdcache operations and status.  Test via a
injecting a permission-000 empty nothing.rpm in the testsuite.

Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
---
 debuginfod/ChangeLog         |  6 ++++
 debuginfod/debuginfod.cxx    | 56 +++++++++++++++++++++++++++---------
 tests/ChangeLog              |  6 ++++
 tests/run-debuginfod-find.sh | 12 ++++++--
 4 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 8cb89967e9d1..236e2ca3270f 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26756: more prometheus metrics
+	* debuginfod.cxx (*_exception): Add counters for error occurrences.
+	(fdcache::*): Add counters for fdcache operations and status.
+
 2020-09-18  Frank Ch. Eigler <fche@redhat.com>
 
 	* debuginfod.cxx (scan_source_file, archive_classify): Store only
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 140b7789de3b..a5334d2a7fc5 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception
 struct libc_exception: public reportable_exception
 {
   libc_exception(int rc, const string& msg):
-    reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
+    reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+    inc_metric("error_count","libc",strerror(rc));
+  }
 };
 
 
 struct archive_exception: public reportable_exception
 {
   archive_exception(const string& msg):
-    reportable_exception(string("libarchive error: ") + msg) {}
+    reportable_exception(string("libarchive error: ") + msg) {
+      inc_metric("error_count","libarchive",msg);
+  }
   archive_exception(struct archive* a, const string& msg):
-    reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
+    reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
+    inc_metric("error_count","libarchive",msg);
+  }
 };
 
 
 struct elfutils_exception: public reportable_exception
 {
   elfutils_exception(int rc, const string& msg):
-    reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
+    reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
+    inc_metric("error_count","elfutils",elf_errmsg(rc));
+  }
 };
 
 
@@ -1083,6 +1091,15 @@ class libarchive_fdcache
   long max_mbs;
 
 public:
+  void set_metrics()
+  {
+    double total_mb = 0.0;
+    for (auto i = lru.begin(); i < lru.end(); i++)
+      total_mb += i->fd_size_mb;
+    set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
+    set_metric("fdcache_count", lru.size());
+  }
+
   void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
   {
     {
@@ -1093,19 +1110,24 @@ class libarchive_fdcache
             {
               unlink (i->fd.c_str());
               lru.erase(i);
+              inc_metric("fdcache_op_count","op","dequeue");
               break; // must not continue iterating
             }
         }
       double mb = (sz+65535)/1048576.0; // round up to 64K block
       fdcache_entry n = { a, b, fd, mb };
-      if (front_p)
+      if (front_p) {
+        inc_metric("fdcache_op_count","op","enqueue_front");
         lru.push_front(n);
-      else
+      } else {
+        inc_metric("fdcache_op_count","op","enqueue_back");
         lru.push_back(n);
-    if (verbose > 3)
-      obatched(clog) << "fdcache interned a=" << a << " b=" << b
-                     << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+      }
+      if (verbose > 3)
+        obatched(clog) << "fdcache interned a=" << a << " b=" << b
+                       << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
     }
+    set_metrics();
 
     // NB: we age the cache at lookup time too
     if (front_p)
@@ -1124,7 +1146,7 @@ class libarchive_fdcache
               fdcache_entry n = *i;
               lru.erase(i); // invalidates i, so no more iteration!
               lru.push_front(n);
-
+              inc_metric("fdcache_op_count","op","requeue_front");
               fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
               break;
             }
@@ -1142,9 +1164,12 @@ class libarchive_fdcache
     unique_lock<mutex> lock(fdcache_lock);
     for (auto i = lru.begin(); i < lru.end(); i++)
       {
-        if (i->archive == a && i->entry == b)
+        if (i->archive == a && i->entry == b) {
+          inc_metric("fdcache_op_count","op","probe_hit");
           return true;
+        }
       }
+    inc_metric("fdcache_op_count","op","probe_miss");
     return false;
   }
 
@@ -1157,7 +1182,9 @@ class libarchive_fdcache
           { // found it; move it to head of lru
             fdcache_entry n = *i;
             lru.erase(i); // invalidates i, so no more iteration!
+            inc_metric("fdcache_op_count","op","clear");
             unlink (n.fd.c_str());
+            set_metrics();
             return;
           }
       }
@@ -1188,14 +1215,15 @@ class libarchive_fdcache
                 if (verbose > 3)
                   obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
                                  << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+                inc_metric("fdcache_op_count","op","evict");
                 unlink (j->fd.c_str());
               }
 
             lru.erase(i, lru.end()); // erase the nodes generally
             break;
           }
-
       }
+    set_metrics();
   }
 
   ~libarchive_fdcache()
@@ -2263,7 +2291,7 @@ scan_source_file (const string& rps, const stat_t& st,
               if (verbose > 3)
                 obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
             }
-          
+
           ps_upsert_files
             .reset()
             .bind(1, dwarfsrc_canon)
@@ -2435,7 +2463,7 @@ archive_classify (const string& rps, string& archive_extension,
                       if (verbose > 3)
                         obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
                     }
-                  
+
                   ps_upsert_files
                     .reset()
                     .bind(1, dwarfsrc_canon)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 5a8b5899b9c5..aa68ffd383e8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26756: more prometheus metrics
+	* run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
+	and check for new metrics.
+
 2020-09-18  Mark Wielaard  <mark@klomp.org>
 
 	* run-readelf-compressed-zstd.sh: New test.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 730bb0e10b47..79976f70dc92 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -95,6 +95,10 @@ wait_ready()
   fi
 }
 
+# create a 000 empty .rpm file to evoke a metric-visible error
+touch R/nothing.rpm
+chmod 000 R/nothing.rpm
+
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
 PID1=$!
 tempfiles vlog4
@@ -240,8 +244,8 @@ fi
 
 cp -rvp ${abs_srcdir}/debuginfod-tars Z
 kill -USR1 $PID1
-# All rpms need to be in the index
-rpms=$(find R -name \*rpm | wc -l)
+# All rpms need to be in the index, except the dummy permission-000 one
+rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
 wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms
 txz=$(find Z -name \*tar.xz | wc -l)
 wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz
@@ -255,7 +259,7 @@ kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr
 mkdir extracted
 cd extracted
 subdir=0;
-newrpms=$(find ../R -name \*\.rpm)
+newrpms=$(find ../R -name \*\.rpm | grep -v nothing)
 for i in $newrpms; do
     subdir=$[$subdir+1];
     mkdir $subdir;
@@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum'
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count'
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count'
 
 # And generate a few errors into the second debuginfod's logs, for analysis just below
 curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true
-- 
2.26.2


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] PR25756 more debuginfod metrics
  2020-10-21  0:44 [PATCH] PR25756 more debuginfod metrics Frank Ch. Eigler
@ 2020-10-21 12:30 ` Mark Wielaard
  2020-10-21 14:01   ` Frank Ch. Eigler
  0 siblings, 1 reply; 4+ messages in thread
From: Mark Wielaard @ 2020-10-21 12:30 UTC (permalink / raw)
  To: Frank Ch. Eigler, elfutils-devel

Hi Frank,

On Tue, 2020-10-20 at 20:44 -0400, Frank Ch. Eigler via Elfutils-devel wrote:
> Subject: [PATCH 1/2] PR26756: add more prometheus metrics to debuginfod
> 
> From: Frank Ch. Eigler <fche@redhat.com>
> 
> Add an error_count{} family of metrics for each libc/libarchive/http
> exception instance created during operation.  Add a family of fdcache*
> metrics for tracking fdcache operations and status.  Test via a
> injecting a permission-000 empty nothing.rpm in the testsuite.

I found this somewhat hard to review since the indenting isn't
following the (GNU) style used in the rest of the file. Please place
curly brackets on their own line when possible.

> Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
> ---
>  debuginfod/ChangeLog         |  6 ++++
>  debuginfod/debuginfod.cxx    | 56 +++++++++++++++++++++++++++-------
> --
>  tests/ChangeLog              |  6 ++++
>  tests/run-debuginfod-find.sh | 12 ++++++--
>  4 files changed, 63 insertions(+), 17 deletions(-)
> 
> diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
> index 8cb89967e9d1..236e2ca3270f 100644
> --- a/debuginfod/ChangeLog
> +++ b/debuginfod/ChangeLog
> @@ -1,3 +1,9 @@
> +2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
> +
> +	PR26756: more prometheus metrics
> +	* debuginfod.cxx (*_exception): Add counters for error occurrences.
> +	(fdcache::*): Add counters for fdcache operations and status.

Also a new public set_metrics() function.

In general these changes look correct.
Unfortunately the test crashes on my RHEL7 with DTS9 system:

Program terminated with signal SIGSEGV, Segmentation fault.
#0  0x000000000040dd58 in std::string::size (this=0xe63910)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:6226
6226	    operator<(const basic_string<_CharT, _Traits, _Alloc>& __lhs,
(gdb) where
#0  0x000000000040dd58 in std::string::size (this=0xe63910)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:6226
#1  std::string::compare (__str="fdcache_op_count{op=\"evict\"}", 
    this=0xe63910)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:5753
#2  std::operator< <char, std::char_traits<char>, std::allocator<char> > (
    __rhs="fdcache_op_count{op=\"evict\"}", 
    __lhs=<error reading variable: Cannot access memory at address 0x225c>)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/basic_string.h:6229
#3  std::less<std::string>::operator() (this=<optimized out>, 
    __y="fdcache_op_count{op=\"evict\"}", 
    __x=<error reading variable: Cannot access memory at address 0x225c>)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_function.h:386
#4  std::_Rb_tree<std::string, std::pair<std::string const, long>, std::_Select1st<std::pair<std::string const, long> >, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::_M_lower_bound (this=<optimized out>, 
    __k="fdcache_op_count{op=\"evict\"}", __y=0x7f922000ff00, __x=0xe638f0)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:1929
#5  std::_Rb_tree<std::string, std::pair<std::string const, long>, std::_Select1st<std::pair<std::string const, long> >, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::lower_bound (
    __k="fdcache_op_count{op=\"evict\"}", this=0x435aa0 <metrics>)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:1282
#6  std::map<std::string, long, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::lower_bound (
    __x="fdcache_op_count{op=\"evict\"}", this=0x435aa0 <metrics>)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_map.h:1258
#7  std::map<std::string, long, std::less<std::string>, std::allocator<std::pair<std::string const, long> > >::operator[] (
    __k="fdcache_op_count{op=\"evict\"}", this=0x435aa0 <metrics>)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_map.h:495
#8  inc_metric (metric=..., lname=..., lvalue=...)
    at /home/mark/src/elfutils/debuginfod/debuginfod.cxx:1716
#9  0x0000000000421db5 in libarchive_fdcache::limit (
    this=this@entry=0x435ae0 <fdcache>, maxfds=maxfds@entry=0, 
    maxmbs=maxmbs@entry=0)
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/ext/new_allocator.h:80
#10 0x0000000000422760 in libarchive_fdcache::~libarchive_fdcache (
    this=0x435ae0 <fdcache>, __in_chrg=<optimized out>)
    at /home/mark/src/elfutils/debuginfod/debuginfod.cxx:1231
#11 0x00007f924ecb7ce9 in __run_exit_handlers (status=0, 
    listp=0x7f924f0456c8 <__exit_funcs>, 
    run_list_atexit=run_list_atexit@entry=true) at exit.c:77
#12 0x00007f924ecb7d37 in __GI_exit (status=<optimized out>) at exit.c:99
#13 0x00007f924eca055c in __libc_start_main (
    main=0x40ad40 <main(int, char**)>, argc=10, argv=0x7ffc8ebaf6a8, 
    init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, 
    stack_end=0x7ffc8ebaf698) at ../csu/libc-start.c:300
#14 0x000000000040c3e5 in _start ()
    at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:211

Which seems to be caused by the libarchive_fdcache destructor calling
limit (0,0) triggering the new inc_metric which somehow cannot use the
metrics map?

> diff --git a/tests/ChangeLog b/tests/ChangeLog
> index 5a8b5899b9c5..aa68ffd383e8 100644
> --- a/tests/ChangeLog
> +++ b/tests/ChangeLog
> @@ -1,3 +1,9 @@
> +2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
> +
> +	PR26756: more prometheus metrics
> +	* run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
> +	and check for new metrics.
> +
>  2020-09-18  Mark Wielaard  <mark@klomp.org>
>  
>  	* run-readelf-compressed-zstd.sh: New test.
> diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
> index 730bb0e10b47..79976f70dc92 100755
> --- a/tests/run-debuginfod-find.sh
> +++ b/tests/run-debuginfod-find.sh
> @@ -95,6 +95,10 @@ wait_ready()
>    fi
>  }
>  
> +# create a 000 empty .rpm file to evoke a metric-visible error
> +touch R/nothing.rpm
> +chmod 000 R/nothing.rpm
> +

Does this cause any issues during cleanup?
I would have expected something like:

if [ -f R/nothing.rpm ]; then chmod 644 R/nothing.rpm; fi

in cleanup(). But maybe the rm -f takes care of that?

Cheers,

Mark

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] PR25756 more debuginfod metrics
  2020-10-21 12:30 ` Mark Wielaard
@ 2020-10-21 14:01   ` Frank Ch. Eigler
  2020-10-21 19:22     ` [PATCH v2] " Frank Ch. Eigler
  0 siblings, 1 reply; 4+ messages in thread
From: Frank Ch. Eigler @ 2020-10-21 14:01 UTC (permalink / raw)
  To: Mark Wielaard; +Cc: elfutils-devel

Hi -

> I found this somewhat hard to review since the indenting isn't
> following the (GNU) style used in the rest of the file. Please place
> curly brackets on their own line when possible.

OK.

> > +2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
> > +
> > +	PR26756: more prometheus metrics
> > +	* debuginfod.cxx (*_exception): Add counters for error occurrences.
> > +	(fdcache::*): Add counters for fdcache operations and status.
> 
> Also a new public set_metrics() function.

OK.

> In general these changes look correct.
> Unfortunately the test crashes on my RHEL7 with DTS9 system:
> [...]
> #11 0x00007f924ecb7ce9 in __run_exit_handlers (status=0, 
>     listp=0x7f924f0456c8 <__exit_funcs>, 
>     run_list_atexit=run_list_atexit@entry=true) at exit.c:77
> #12 0x00007f924ecb7d37 in __GI_exit (status=<optimized out>) at exit.c:99
> #13 0x00007f924eca055c in __libc_start_main (
>     main=0x40ad40 <main(int, char**)>, argc=10, argv=0x7ffc8ebaf6a8, 
>     init=<optimized out>, fini=<optimized out>, rtld_fini=<optimized out>, 
>     stack_end=0x7ffc8ebaf698) at ../csu/libc-start.c:300
> #14 0x000000000040c3e5 in _start ()
>     at /opt/rh/devtoolset-9/root/usr/include/c++/9/bits/stl_tree.h:211
> Which seems to be caused by the libarchive_fdcache destructor calling
> limit (0,0) triggering the new inc_metric which somehow cannot use the
> metrics map?

Ah it's a global variable destructor sequencing issue.
Hmm what a pain, ok will think of a solution.


> > +# create a 000 empty .rpm file to evoke a metric-visible error
> > +touch R/nothing.rpm
> > +chmod 000 R/nothing.rpm
> > +
> 
> Does this cause any issues during cleanup?
> I would have expected something like:
> 
> if [ -f R/nothing.rpm ]; then chmod 644 R/nothing.rpm; fi
> 
> in cleanup(). But maybe the rm -f takes care of that?

There's a "rm -rf R" in cleanup, so is covered.


- FChE


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] PR25756 more debuginfod metrics
  2020-10-21 14:01   ` Frank Ch. Eigler
@ 2020-10-21 19:22     ` Frank Ch. Eigler
  0 siblings, 0 replies; 4+ messages in thread
From: Frank Ch. Eigler @ 2020-10-21 19:22 UTC (permalink / raw)
  To: Mark Wielaard, elfutils-devel

Hi -

    PR26756: add more prometheus metrics to debuginfod
    
    Add an error_count{} family of metrics for each libc/libarchive/http
    exception instance created during operation.  Add a family of fdcache*
    metrics for tracking fdcache operations and status.  Test via a
    injecting a permission-000 empty nothing.rpm in the testsuite.
    
    Signed-off-by: Frank Ch. Eigler <fche@redhat.com>

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 688404e5d567..3341f1876f22 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,11 @@
+2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26756: more prometheus metrics
+	* debuginfod.cxx (*_exception): Add counters for error occurrences.
+	(fdcache::*): Add counters for fdcache operations and status.
+	(fdcache::set_metric): New fn for overall stat counts.
+	(fdcache::limit): ... allow metric-less use from dtors.
+
 2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
 
 	* debuginfod.cxx (handle_buildid*): Add a parameter for detecting
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 7907fecc7419..2b68ff1f052d 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception
 struct libc_exception: public reportable_exception
 {
   libc_exception(int rc, const string& msg):
-    reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {}
+    reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {
+    inc_metric("error_count","libc",strerror(rc));
+  }
 };
 
 
 struct archive_exception: public reportable_exception
 {
   archive_exception(const string& msg):
-    reportable_exception(string("libarchive error: ") + msg) {}
+    reportable_exception(string("libarchive error: ") + msg) {
+      inc_metric("error_count","libarchive",msg);
+  }
   archive_exception(struct archive* a, const string& msg):
-    reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {}
+    reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {
+    inc_metric("error_count","libarchive",msg);
+  }
 };
 
 
 struct elfutils_exception: public reportable_exception
 {
   elfutils_exception(int rc, const string& msg):
-    reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {}
+    reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {
+    inc_metric("error_count","elfutils",elf_errmsg(rc));
+  }
 };
 
 
@@ -1085,6 +1093,15 @@ class libarchive_fdcache
   long max_mbs;
 
 public:
+  void set_metrics()
+  {
+    double total_mb = 0.0;
+    for (auto i = lru.begin(); i < lru.end(); i++)
+      total_mb += i->fd_size_mb;
+    set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0));
+    set_metric("fdcache_count", lru.size());
+  }
+
   void intern(const string& a, const string& b, string fd, off_t sz, bool front_p)
   {
     {
@@ -1095,19 +1112,27 @@ class libarchive_fdcache
             {
               unlink (i->fd.c_str());
               lru.erase(i);
+              inc_metric("fdcache_op_count","op","dequeue");
               break; // must not continue iterating
             }
         }
       double mb = (sz+65535)/1048576.0; // round up to 64K block
       fdcache_entry n = { a, b, fd, mb };
       if (front_p)
-        lru.push_front(n);
+        {
+          inc_metric("fdcache_op_count","op","enqueue_front");
+          lru.push_front(n);
+        }
       else
-        lru.push_back(n);
-    if (verbose > 3)
-      obatched(clog) << "fdcache interned a=" << a << " b=" << b
-                     << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
+        {
+          inc_metric("fdcache_op_count","op","enqueue_back");
+          lru.push_back(n);
+        }
+      if (verbose > 3)
+        obatched(clog) << "fdcache interned a=" << a << " b=" << b
+                       << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl;
     }
+    set_metrics();
 
     // NB: we age the cache at lookup time too
     if (front_p)
@@ -1126,7 +1151,7 @@ class libarchive_fdcache
               fdcache_entry n = *i;
               lru.erase(i); // invalidates i, so no more iteration!
               lru.push_front(n);
-
+              inc_metric("fdcache_op_count","op","requeue_front");
               fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss
               break;
             }
@@ -1145,8 +1170,12 @@ class libarchive_fdcache
     for (auto i = lru.begin(); i < lru.end(); i++)
       {
         if (i->archive == a && i->entry == b)
-          return true;
+          {
+            inc_metric("fdcache_op_count","op","probe_hit");
+            return true;
+          }
       }
+    inc_metric("fdcache_op_count","op","probe_miss");
     return false;
   }
 
@@ -1159,13 +1188,15 @@ class libarchive_fdcache
           { // found it; move it to head of lru
             fdcache_entry n = *i;
             lru.erase(i); // invalidates i, so no more iteration!
+            inc_metric("fdcache_op_count","op","clear");
             unlink (n.fd.c_str());
+            set_metrics();
             return;
           }
       }
   }
 
-  void limit(long maxfds, long maxmbs)
+  void limit(long maxfds, long maxmbs, bool metrics_p = true)
   {
     if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs))
       obatched(clog) << "fdcache limited to maxfds=" << maxfds << " maxmbs=" << maxmbs << endl;
@@ -1190,19 +1221,23 @@ class libarchive_fdcache
                 if (verbose > 3)
                   obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry
                                  << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl;
+                if (metrics_p)
+                  inc_metric("fdcache_op_count","op","evict");
                 unlink (j->fd.c_str());
               }
 
             lru.erase(i, lru.end()); // erase the nodes generally
             break;
           }
-
       }
+    if (metrics_p) set_metrics();
   }
 
   ~libarchive_fdcache()
   {
-    limit(0, 0);
+    // unlink any fdcache entries in $TMPDIR
+    // don't update metrics; those globals may be already destroyed 
+    limit(0, 0, false);
   }
 };
 static libarchive_fdcache fdcache;
@@ -1638,6 +1673,8 @@ handle_buildid (MHD_Connection* conn,
 static map<string,int64_t> metrics; // arbitrary data for /metrics query
 // NB: store int64_t since all our metrics are integers; prometheus accepts double
 static mutex metrics_lock;
+// NB: these objects get released during the process exit via global dtors
+// do not call them from within other global dtors
 
 // utility function for assembling prometheus-compatible
 // name="escaped-value" strings
@@ -2269,7 +2306,7 @@ scan_source_file (const string& rps, const stat_t& st,
               if (verbose > 3)
                 obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
             }
-          
+
           ps_upsert_files
             .reset()
             .bind(1, dwarfsrc_canon)
@@ -2441,7 +2478,7 @@ archive_classify (const string& rps, string& archive_extension,
                       if (verbose > 3)
                         obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl;
                     }
-                  
+
                   ps_upsert_files
                     .reset()
                     .bind(1, dwarfsrc_canon)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 5a8b5899b9c5..aa68ffd383e8 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2020-10-20  Frank Ch. Eigler  <fche@redhat.com>
+
+	PR26756: more prometheus metrics
+	* run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm"
+	and check for new metrics.
+
 2020-09-18  Mark Wielaard  <mark@klomp.org>
 
 	* run-readelf-compressed-zstd.sh: New test.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 730bb0e10b47..79976f70dc92 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -95,6 +95,10 @@ wait_ready()
   fi
 }
 
+# create a 000 empty .rpm file to evoke a metric-visible error
+touch R/nothing.rpm
+chmod 000 R/nothing.rpm
+
 env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 &
 PID1=$!
 tempfiles vlog4
@@ -240,8 +244,8 @@ fi
 
 cp -rvp ${abs_srcdir}/debuginfod-tars Z
 kill -USR1 $PID1
-# All rpms need to be in the index
-rpms=$(find R -name \*rpm | wc -l)
+# All rpms need to be in the index, except the dummy permission-000 one
+rpms=$(find R -name \*rpm | grep -v nothing | wc -l)
 wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms
 txz=$(find Z -name \*tar.xz | wc -l)
 wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz
@@ -255,7 +259,7 @@ kill -USR1 $PID1  # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr
 mkdir extracted
 cd extracted
 subdir=0;
-newrpms=$(find ../R -name \*\.rpm)
+newrpms=$(find ../R -name \*\.rpm | grep -v nothing)
 for i in $newrpms; do
     subdir=$[$subdir+1];
     mkdir $subdir;
@@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum'
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count'
 curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_'
+curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count'
 
 # And generate a few errors into the second debuginfod's logs, for analysis just below
 curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-10-21 19:22 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-21  0:44 [PATCH] PR25756 more debuginfod metrics Frank Ch. Eigler
2020-10-21 12:30 ` Mark Wielaard
2020-10-21 14:01   ` Frank Ch. Eigler
2020-10-21 19:22     ` [PATCH v2] " Frank Ch. Eigler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).