From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [63.128.21.124]) by sourceware.org (Postfix) with ESMTP id 4530D385780D for ; Wed, 21 Oct 2020 00:45:05 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 4530D385780D Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-98-XYZ9UAGyOiGKFxrzP1XOZw-1; Tue, 20 Oct 2020 20:45:01 -0400 X-MC-Unique: XYZ9UAGyOiGKFxrzP1XOZw-1 Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id 9C9C88030C0 for ; Wed, 21 Oct 2020 00:45:00 +0000 (UTC) Received: from redhat.com (ovpn-112-64.phx2.redhat.com [10.3.112.64]) by smtp.corp.redhat.com (Postfix) with ESMTPS id 689BB5C1C2 for ; Wed, 21 Oct 2020 00:45:00 +0000 (UTC) Received: from fche by redhat.com with local (Exim 4.94) (envelope-from ) id 1kV2FP-0005ea-2t for elfutils-devel@sourceware.org; Tue, 20 Oct 2020 20:44:59 -0400 Date: Tue, 20 Oct 2020 20:44:59 -0400 From: "Frank Ch. Eigler" To: elfutils-devel@sourceware.org Subject: [PATCH] PR25756 more debuginfod metrics Message-ID: <20201021004459.GA21732@redhat.com> MIME-Version: 1.0 User-Agent: Mutt/1.12.0 (2019-05-25) X-Scanned-By: MIMEDefang 2.79 on 10.5.11.16 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain; charset=us-ascii Content-Disposition: inline X-Spam-Status: No, score=-12.2 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, KAM_BADIPHTTP, RCVD_IN_DNSWL_NONE, RCVD_IN_MSPIKE_H5, RCVD_IN_MSPIKE_WL, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: elfutils-devel@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Elfutils-devel mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 21 Oct 2020 00:45:07 -0000 Subject: [PATCH 1/2] PR26756: add more prometheus metrics to debuginfod From: Frank Ch. Eigler Add an error_count{} family of metrics for each libc/libarchive/http exception instance created during operation. Add a family of fdcache* metrics for tracking fdcache operations and status. Test via a injecting a permission-000 empty nothing.rpm in the testsuite. Signed-off-by: Frank Ch. Eigler --- debuginfod/ChangeLog | 6 ++++ debuginfod/debuginfod.cxx | 56 +++++++++++++++++++++++++++--------- tests/ChangeLog | 6 ++++ tests/run-debuginfod-find.sh | 12 ++++++-- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 8cb89967e9d1..236e2ca3270f 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,9 @@ +2020-10-20 Frank Ch. Eigler + + PR26756: more prometheus metrics + * debuginfod.cxx (*_exception): Add counters for error occurrences. + (fdcache::*): Add counters for fdcache operations and status. + 2020-09-18 Frank Ch. Eigler * debuginfod.cxx (scan_source_file, archive_classify): Store only diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 140b7789de3b..a5334d2a7fc5 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -548,23 +548,31 @@ struct sqlite_exception: public reportable_exception struct libc_exception: public reportable_exception { libc_exception(int rc, const string& msg): - reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) {} + reportable_exception(string("libc error: ") + msg + ": " + string(strerror(rc) ?: "?")) { + inc_metric("error_count","libc",strerror(rc)); + } }; struct archive_exception: public reportable_exception { archive_exception(const string& msg): - reportable_exception(string("libarchive error: ") + msg) {} + reportable_exception(string("libarchive error: ") + msg) { + inc_metric("error_count","libarchive",msg); + } archive_exception(struct archive* a, const string& msg): - reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) {} + reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) { + inc_metric("error_count","libarchive",msg); + } }; struct elfutils_exception: public reportable_exception { elfutils_exception(int rc, const string& msg): - reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) {} + reportable_exception(string("elfutils error: ") + msg + ": " + string(elf_errmsg(rc) ?: "?")) { + inc_metric("error_count","elfutils",elf_errmsg(rc)); + } }; @@ -1083,6 +1091,15 @@ class libarchive_fdcache long max_mbs; public: + void set_metrics() + { + double total_mb = 0.0; + for (auto i = lru.begin(); i < lru.end(); i++) + total_mb += i->fd_size_mb; + set_metric("fdcache_bytes", (int64_t)(total_mb*1024.0*1024.0)); + set_metric("fdcache_count", lru.size()); + } + void intern(const string& a, const string& b, string fd, off_t sz, bool front_p) { { @@ -1093,19 +1110,24 @@ class libarchive_fdcache { unlink (i->fd.c_str()); lru.erase(i); + inc_metric("fdcache_op_count","op","dequeue"); break; // must not continue iterating } } double mb = (sz+65535)/1048576.0; // round up to 64K block fdcache_entry n = { a, b, fd, mb }; - if (front_p) + if (front_p) { + inc_metric("fdcache_op_count","op","enqueue_front"); lru.push_front(n); - else + } else { + inc_metric("fdcache_op_count","op","enqueue_back"); lru.push_back(n); - if (verbose > 3) - obatched(clog) << "fdcache interned a=" << a << " b=" << b - << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl; + } + if (verbose > 3) + obatched(clog) << "fdcache interned a=" << a << " b=" << b + << " fd=" << fd << " mb=" << mb << " front=" << front_p << endl; } + set_metrics(); // NB: we age the cache at lookup time too if (front_p) @@ -1124,7 +1146,7 @@ class libarchive_fdcache fdcache_entry n = *i; lru.erase(i); // invalidates i, so no more iteration! lru.push_front(n); - + inc_metric("fdcache_op_count","op","requeue_front"); fd = open(n.fd.c_str(), O_RDONLY); // NB: no problem if dup() fails; looks like cache miss break; } @@ -1142,9 +1164,12 @@ class libarchive_fdcache unique_lock lock(fdcache_lock); for (auto i = lru.begin(); i < lru.end(); i++) { - if (i->archive == a && i->entry == b) + if (i->archive == a && i->entry == b) { + inc_metric("fdcache_op_count","op","probe_hit"); return true; + } } + inc_metric("fdcache_op_count","op","probe_miss"); return false; } @@ -1157,7 +1182,9 @@ class libarchive_fdcache { // found it; move it to head of lru fdcache_entry n = *i; lru.erase(i); // invalidates i, so no more iteration! + inc_metric("fdcache_op_count","op","clear"); unlink (n.fd.c_str()); + set_metrics(); return; } } @@ -1188,14 +1215,15 @@ class libarchive_fdcache if (verbose > 3) obatched(clog) << "fdcache evicted a=" << j->archive << " b=" << j->entry << " fd=" << j->fd << " mb=" << j->fd_size_mb << endl; + inc_metric("fdcache_op_count","op","evict"); unlink (j->fd.c_str()); } lru.erase(i, lru.end()); // erase the nodes generally break; } - } + set_metrics(); } ~libarchive_fdcache() @@ -2263,7 +2291,7 @@ scan_source_file (const string& rps, const stat_t& st, if (verbose > 3) obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; } - + ps_upsert_files .reset() .bind(1, dwarfsrc_canon) @@ -2435,7 +2463,7 @@ archive_classify (const string& rps, string& archive_extension, if (verbose > 3) obatched(clog) << "canonicalized src=" << dwarfsrc << " alias=" << dwarfsrc_canon << endl; } - + ps_upsert_files .reset() .bind(1, dwarfsrc_canon) diff --git a/tests/ChangeLog b/tests/ChangeLog index 5a8b5899b9c5..aa68ffd383e8 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2020-10-20 Frank Ch. Eigler + + PR26756: more prometheus metrics + * run-debuginfod-find.sh: Trigger some errors with dummy "nothing.rpm" + and check for new metrics. + 2020-09-18 Mark Wielaard * run-readelf-compressed-zstd.sh: New test. diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 730bb0e10b47..79976f70dc92 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -95,6 +95,10 @@ wait_ready() fi } +# create a 000 empty .rpm file to evoke a metric-visible error +touch R/nothing.rpm +chmod 000 R/nothing.rpm + env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 & PID1=$! tempfiles vlog4 @@ -240,8 +244,8 @@ fi cp -rvp ${abs_srcdir}/debuginfod-tars Z kill -USR1 $PID1 -# All rpms need to be in the index -rpms=$(find R -name \*rpm | wc -l) +# All rpms need to be in the index, except the dummy permission-000 one +rpms=$(find R -name \*rpm | grep -v nothing | wc -l) wait_ready $PORT1 'scanned_total{source=".rpm archive"}' $rpms txz=$(find Z -name \*tar.xz | wc -l) wait_ready $PORT1 'scanned_total{source=".tar.xz archive"}' $txz @@ -255,7 +259,7 @@ kill -USR1 $PID1 # two hits of SIGUSR1 may be needed to resolve .debug->dwz->sr mkdir extracted cd extracted subdir=0; -newrpms=$(find ../R -name \*\.rpm) +newrpms=$(find ../R -name \*\.rpm | grep -v nothing) for i in $newrpms; do subdir=$[$subdir+1]; mkdir $subdir; @@ -440,6 +444,8 @@ curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_millisec curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_duration_milliseconds_sum' curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_count' curl -s http://127.0.0.1:$PORT1/metrics | grep 'http_responses_transfer_bytes_sum' +curl -s http://127.0.0.1:$PORT1/metrics | grep 'fdcache_' +curl -s http://127.0.0.1:$PORT1/metrics | grep 'error_count' # And generate a few errors into the second debuginfod's logs, for analysis just below curl -s http://127.0.0.1:$PORT2/badapi > /dev/null || true -- 2.26.2