public inbox for elfutils@sourceware.org
 help / color / mirror / Atom feed
From: Noah Sanci <nsanci@redhat.com>
To: elfutils-devel@sourceware.org
Subject: Re: [Bug debuginfod/27983] ignore duplicate urls
Date: Mon, 26 Jul 2021 12:29:16 -0400	[thread overview]
Message-ID: <CAJXA7qhoaK-CbmBY+7kf0BetQ5F3ORqeCr4oxU3MT9btCx3g7A@mail.gmail.com> (raw)
In-Reply-To: <7b021d11929e451d77961ab183cd97f3329a6dce.camel@klomp.org>

[-- Attachment #1: Type: text/plain, Size: 155 bytes --]

Hello,

The realloc returning NULL issue has been resolved and the patch
successfully rebased onto master. Please find these improvements
attached.


Noah

[-- Attachment #2: 0001-debuginfod-PR27983-ignore-duplicate-urls.patch --]
[-- Type: text/x-patch, Size: 9350 bytes --]

From a1b4c2b87f3890c7bf2339b3a69e056d487f74d3 Mon Sep 17 00:00:00 2001
From: Noah Sanci <nsanci@redhat.com>
Date: Fri, 9 Jul 2021 14:53:10 -0400
Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls

Gazing at server logs, one sees a minority of clients who appear to have
duplicate query traffic coming in: the same URL, milliseconds apart.
Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow,
and the client library is dutifully asking the servers TWICE. Bug #27863
reduces the pain on the servers' CPU, but dupe network traffic is still
being paid.  We should reject sending outright duplicate concurrent
traffic.
The urls are now simply removed upon finding a duplicate after url
construction.

https://sourceware.org/bugzilla/show_bug.cgi?id=27983

Signed-off-by: Noah Sanci <nsanci@redhat.com>
---
 debuginfod/ChangeLog           |  9 ++++
 debuginfod/debuginfod-client.c | 97 ++++++++++++++++++++++++----------
 tests/ChangeLog                |  6 +++
 tests/run-debuginfod-find.sh   | 11 ++++
 4 files changed, 94 insertions(+), 29 deletions(-)

diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index 7709c24d..81eb56f9 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -34,6 +34,15 @@
 	(parse_opt): Handle 'r' option by setting regex_groom to true.
 	(groom): Introduce and use reg_include and reg_exclude.
 
+2021-07-09  Noah Sanci  <nsanci@redhat.com>
+
+	PR27983
+	* debuginfod-client.c (debuginfod_query_server): As full-length
+	urls are generated with standardized formats, ignore duplicates.
+	Created out1 and changed out2 error gotos. Updated url creation print
+	statements.
+	(globals): Removed url_delim_char, as it was no longer used.
+
 2021-06-18  Mark Wielaard  <mark@klomp.org>
 
 	* debuginfod-client.c (debuginfod_begin): Don't use client if
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index 26ba1891..9d049d33 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -152,7 +152,6 @@ static const char *cache_xdg_name = "debuginfod_client";
 
 /* URLs of debuginfods, separated by url_delim. */
 static const char *url_delim =  " ";
-static const char url_delim_char = ' ';
 
 /* Timeout for debuginfods, in seconds (to get at least 100K). */
 static const long default_timeout = 90;
@@ -765,13 +764,60 @@ debuginfod_query_server (debuginfod_client *c,
       goto out0;
     }
 
+  /* Initialize the memory to zero */
+  char *strtok_saveptr;
+  char **server_url_list = NULL;
+  char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
   /* Count number of URLs.  */
   int num_urls = 0;
-  for (int i = 0; server_urls[i] != '\0'; i++)
-    if (server_urls[i] != url_delim_char
-        && (i == 0 || server_urls[i - 1] == url_delim_char))
-      num_urls++;
-  
+
+  while (server_url != NULL)
+    {
+      /* PR 27983: If the url is already set to be used use, skip it */
+      char *slashbuildid;
+      if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
+        slashbuildid = "buildid";
+      else
+        slashbuildid = "/buildid";
+
+      char *tmp_url;
+      if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1)
+        {
+          rc = -ENOMEM;
+          goto out1;
+        }
+      int url_index;
+      for (url_index = 0; url_index < num_urls; ++url_index)
+        {
+          if(strcmp(tmp_url, server_url_list[url_index]) == 0)
+            {
+              url_index = -1;
+              break;
+            }
+        }
+      if (url_index == -1)
+        {
+          if (vfd >= 0)
+            dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url);
+          free(tmp_url);
+        }
+      else
+        {
+          num_urls++;
+          char ** realloc_ptr;
+          realloc_ptr = reallocarray(server_url_list, num_urls,
+                                         sizeof(char*));
+          if (realloc_ptr == NULL)
+            {
+              rc = -ENOMEM;
+              goto out1;
+            }
+          server_url_list = realloc_ptr;
+          server_url_list[num_urls-1] = tmp_url;
+        }
+      server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
+    }
+
   int retry_limit = default_retry_limit;
   const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR);
   if (retry_limit_envvar != NULL)
@@ -804,12 +850,11 @@ debuginfod_query_server (debuginfod_client *c,
       data[i].errbuf[0] = '\0';
     }
 
-  char *strtok_saveptr;
-  char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
-
   /* Initialize each handle.  */
-  for (int i = 0; i < num_urls && server_url != NULL; i++)
+  for (int i = 0; i < num_urls; i++)
     {
+      if ((server_url = server_url_list[i]) == NULL)
+        break;
       if (vfd >= 0)
 	dprintf (vfd, "init server %d %s\n", i, server_url);
 
@@ -819,18 +864,10 @@ debuginfod_query_server (debuginfod_client *c,
       if (data[i].handle == NULL)
         {
           rc = -ENETUNREACH;
-          goto out1;
+          goto out2;
         }
       data[i].client = c;
 
-      /* Build handle url. Tolerate both  http://foo:999  and
-         http://foo:999/  forms */
-      char *slashbuildid;
-      if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/')
-        slashbuildid = "buildid";
-      else
-        slashbuildid = "/buildid";
-
       if (filename) /* must start with / */
         {
           /* PR28034 escape characters in completed url to %hh format. */
@@ -841,14 +878,12 @@ debuginfod_query_server (debuginfod_client *c,
               rc = -ENOMEM;
               goto out1;
             }
-          snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url,
-                   slashbuildid, build_id_bytes, type, escaped_string);
+          snprintf(data[i].url, PATH_MAX, "%s/%s/%s/%s", server_url,
+                   build_id_bytes, type, escaped_string);
           curl_free(escaped_string);
         }
       else
-        snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url,
-                 slashbuildid, build_id_bytes, type);
-
+        snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type);
       if (vfd >= 0)
 	dprintf (vfd, "url %d %s\n", i, data[i].url);
 
@@ -883,7 +918,6 @@ debuginfod_query_server (debuginfod_client *c,
       curl_easy_setopt(data[i].handle, CURLOPT_HTTPHEADER, c->headers);
 
       curl_multi_add_handle(curlm, data[i].handle);
-      server_url = strtok_r(NULL, url_delim, &strtok_saveptr);
     }
 
   /* Query servers in parallel.  */
@@ -927,7 +961,7 @@ debuginfod_query_server (debuginfod_client *c,
             case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break;
             default: rc = -ENETUNREACH; break;
             }
-          goto out1;
+          goto out2;
         }
 
       if (c->progressfn) /* inform/check progress callback */
@@ -1115,7 +1149,7 @@ debuginfod_query_server (debuginfod_client *c,
 	    goto query_in_parallel;
 	}
       else
-	goto out1;
+	goto out2;
     }
 
   if (vfd >= 0)
@@ -1145,7 +1179,7 @@ debuginfod_query_server (debuginfod_client *c,
   if (rc < 0)
     {
       rc = -errno;
-      goto out1;
+      goto out2;
       /* Perhaps we need not give up right away; could retry or something ... */
     }
 
@@ -1168,7 +1202,7 @@ debuginfod_query_server (debuginfod_client *c,
   goto out;
 
 /* error exits */
- out1:
+ out2:
   /* remove all handles from multi */
   for (int i = 0; i < num_urls; i++)
     {
@@ -1181,6 +1215,11 @@ debuginfod_query_server (debuginfod_client *c,
   (void) rmdir (target_cache_dir); /* nop if not empty */
   free(data);
 
+ out1:
+  for (int i = 0; i < num_urls; ++i)
+    free(server_url_list[i]);
+  free(server_url_list);
+
  out0:
   free (server_urls);
 
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 3be9ee48..dba750e4 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -30,6 +30,12 @@
 	* run-debuginfod-find.sh: Added test case for grooming the database
 	using regexes. 
 
+2021-07-09  Noah Sanci  <nsanci@redhat.com>
+
+	PR27983
+	* run-debuginfod-find.sh: Wrote test to ensure duplicate urls are in
+	fact not checked.
+
 2021-07-08  Mark Wielaard  <mark@klomp.org>
 
 	* Makefile.am (EXTRA_DIST): Fix typo testfile-largealign.bz2 was
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 947c1261..c2c3b9c3 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -364,6 +364,17 @@ rm -rf extracted
 
 wait_ready $PORT1 'found_sourcerefs_total{source=".rpm archive"}' $sourcefiles
 
+########################################################################
+# PR27983 ensure no duplicate urls are used in when querying servers for files
+rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests
+export DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http:127.0.0.1:7999"
+env LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find -v executable $BUILDID2 > vlog4 2>&1 || true
+tempfiles vlog4
+if [ $( grep -c 'duplicate url: http://127.0.0.1:'$PORT1'.*' vlog4 ) -ne 2 ]; then
+  echo "Duplicated servers remain";
+  err
+fi
+########################################################################
 # Run a bank of queries against the debuginfod-rpms / debuginfod-debs test cases
 
 archive_test() {
-- 
2.31.1


  reply	other threads:[~2021-07-26 16:29 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-09 19:12 Noah Sanci
2021-07-14 16:36 ` Mark Wielaard
2021-07-19 13:31   ` Noah Sanci
2021-07-20 14:42     ` Mark Wielaard
2021-07-22 16:25       ` Noah Sanci
2021-07-22 19:22         ` Noah Sanci
2021-07-23 18:34           ` Mark Wielaard
2021-07-26 16:29             ` Noah Sanci [this message]
2021-07-28 14:55               ` Mark Wielaard
2021-07-28 16:23                 ` Noah Sanci
2021-07-29 12:20                   ` Mark Wielaard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=CAJXA7qhoaK-CbmBY+7kf0BetQ5F3ORqeCr4oxU3MT9btCx3g7A@mail.gmail.com \
    --to=nsanci@redhat.com \
    --cc=elfutils-devel@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).