public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
* [Bug debuginfod/27983] ignore duplicate urls
@ 2021-07-09 19:12 Noah Sanci
  2021-07-14 16:36 ` Mark Wielaard
  0 siblings, 1 reply; 11+ messages in thread
From: Noah Sanci @ 2021-07-09 19:12 UTC (permalink / raw)
  To: elfutils-devel

[-- Attachment #1: Type: text/plain, Size: 60 bytes --]

Hello,

Please find the patch for bug 27983 attached.

Noah

[-- Attachment #2: 0001-debuginfod-PR27983-ignore-duplicate-urls.patch --]
[-- Type: text/x-patch, Size: 6416 bytes --]

From e37f49a0fd5f27907584b19336cd250d825acc98 Mon Sep 17 00:00:00 2001
From: Noah Sanci <nsanci@redhat.com>
Date: Fri, 9 Jul 2021 14:53:10 -0400
Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls

Gazing at server logs, one sees a minority of clients who appear to have
duplicate query traffic coming in: the same URL, milliseconds apart.
Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow,
and the client library is dutifully asking the servers TWICE. Bug #27863
reduces the pain on the servers' CPU, but dupe network traffic is still
being paid.  We should reject sending outright duplicate concurrent
traffic.

https://sourceware.org/bugzilla/show_bug.cgi?id=27983

Signed-off-by: Noah Sanci <nsanci@redhat.com>
---
 debuginfod/ChangeLog           |  7 +++++
 debuginfod/debuginfod-client.c | 56 +++++++++++++++++++++++++---------
 tests/ChangeLog                |  5 +++
 tests/run-debuginfod-find.sh   | 13 ++++++++
 4 files changed, 67 insertions(+), 14 deletions(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index d9d11737..24ccb8ef 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,10 @@
+2021-07-09  Noah Sanci  <nsanci@redhat.com>
+
+	* debuginfod-client.c (debuginfod_query_server): As full-length
+	urls are generated with standardized formats, ignore duplicates.
+	Also update the number of urls to the unduplicated number of
+	urls.
+
 2021-06-18  Mark Wielaard  <mark@klomp.org>
 
 	* debuginfod-client.c (debuginfod_begin): Don't use client if
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index f417b40a..88e154d2 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -795,6 +795,7 @@ debuginfod_query_server (debuginfod_client *c,
 
   char *strtok_saveptr;
   char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
+  int unduplicated_urls = 0;
 
   /* Initialize each handle.  */
   for (int i = 0; i < num_urls && server_url != NULL; i++)
@@ -802,34 +803,59 @@ debuginfod_query_server (debuginfod_client *c,
       if (vfd >= 0)
 	dprintf (vfd, "init server %d %s\n", i, server_url);
 
-      data[i].fd = fd;
-      data[i].target_handle = &target_handle;
-      data[i].handle = curl_easy_init();
-      if (data[i].handle == NULL)
-        {
-          rc = -ENETUNREACH;
-          goto out1;
-        }
-      data[i].client = c;
-
-      /* Build handle url. Tolerate both  http://foo:999  and
-         http://foo:999/  forms */
       char *slashbuildid;
       if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
         slashbuildid = "buildid";
       else
         slashbuildid = "/buildid";
 
+      char *tmp_url = calloc(PATH_MAX, sizeof(char));
       if (filename) /* must start with / */
-        snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url,
+        snprintf(tmp_url, PATH_MAX, "%s%s/%s/%s%s", server_url,
                  slashbuildid, build_id_bytes, type, filename);
       else
-        snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url,
+        snprintf(tmp_url, PATH_MAX, "%s%s/%s/%s", server_url,
                  slashbuildid, build_id_bytes, type);
 
+      /* PR 27983: If the url is already set to be used use, skip it */
+      int url_index = -1;
+      for (url_index = 0; url_index < i; ++url_index)
+        {
+          if(strcmp(tmp_url, data[url_index].url) == 0)
+            {
+              url_index = -1;
+              break;
+            }
+        }
+      if (url_index == -1)
+        {
+          if (vfd >= 0)
+            dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
+          free(tmp_url);
+          // Ensure that next iteration doesn't skip over an index mid-array
+          i--;
+          server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
+          continue;
+        }
+      else
+        {
+          unduplicated_urls++;
+          strncpy(data[i].url, tmp_url, PATH_MAX);
+          free (tmp_url);
+        }
+
       if (vfd >= 0)
 	dprintf (vfd, "url %d %s\n", i, data[i].url);
 
+      data[i].fd = fd;
+      data[i].target_handle = &target_handle;
+      data[i].handle = curl_easy_init();
+      if (data[i].handle == NULL)
+        {
+          rc = -ENETUNREACH;
+           goto out1;
+        }
+      data[i].client = c;
       curl_easy_setopt(data[i].handle, CURLOPT_URL, data[i].url);
       if (vfd >= 0)
 	curl_easy_setopt(data[i].handle, CURLOPT_ERRORBUFFER, data[i].errbuf);
@@ -863,6 +889,8 @@ debuginfod_query_server (debuginfod_client *c,
       curl_multi_add_handle(curlm, data[i].handle);
       server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
     }
+  num_urls = unduplicated_urls;
+  data = reallocarray( (void *) data, num_urls, sizeof(struct handle_data));
 
   /* Query servers in parallel.  */
   if (vfd >= 0)
diff --git a/tests/ChangeLog b/tests/ChangeLog
index b65cbeb7..5747d658 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,8 @@
+2021-07-09  Noah Sanci  <nsanci@redhat.com>
+
+	* run-debuginfod-find.sh: Wrote test to ensure duplicate urls are in
+	fact not checked.
+
 2021-07-02  Mark Wielaard  <mark@klomp.org>
 
        * run-debuginfo-find.sh: unset VALGRIND_CMD before testing debuginfod
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 74a5ceff..6faaf70b 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -359,6 +359,19 @@ rm -rf extracted
 
 wait_ready $PORT1 'found_sourcerefs_total{source=".rpm archive"}' $sourcefiles
 
+########################################################################
+# PR27983 ensure no duplicate urls are used in when querying servers for files
+TMP_DEBUG_URLS=$DEBUGINFOD_URLS
+rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
+export DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http:127.0.0.1:7999"
+env LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find -v executable $BUILDID2 > vlog4 2>&1 || true
+tempfiles vlog4
+if [ $( grep -c 'duplicate url: http://127.0.0.1:'$PORT1'.*' vlog4 ) -ne 2 ]; then
+  echo "Duplicated servers undetected";
+  err
+fi
+export DEBUGINFOD_URLS=$TMP_DEBUG_URLS
+########################################################################
 # Run a bank of queries against the debuginfod-rpms / debuginfod-debs test cases
 
 archive_test() {
-- 
2.31.1


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2021-07-29 12:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-09 19:12 [Bug debuginfod/27983] ignore duplicate urls Noah Sanci
2021-07-14 16:36 ` Mark Wielaard
2021-07-19 13:31   ` Noah Sanci
2021-07-20 14:42     ` Mark Wielaard
2021-07-22 16:25       ` Noah Sanci
2021-07-22 19:22         ` Noah Sanci
2021-07-23 18:34           ` Mark Wielaard
2021-07-26 16:29             ` Noah Sanci
2021-07-28 14:55               ` Mark Wielaard
2021-07-28 16:23                 ` Noah Sanci
2021-07-29 12:20                   ` Mark Wielaard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).