From be4e07a8732dd688595b99f92ba275ef5060a34d Mon Sep 17 00:00:00 2001 From: Noah Sanci Date: Fri, 9 Jul 2021 14:53:10 -0400 Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls Gazing at server logs, one sees a minority of clients who appear to have duplicate query traffic coming in: the same URL, milliseconds apart. Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow, and the client library is dutifully asking the servers TWICE. Bug #27863 reduces the pain on the servers' CPU, but dupe network traffic is still being paid. We should reject sending outright duplicate concurrent traffic. https://sourceware.org/bugzilla/show_bug.cgi?id=27983 Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 7 +++++ debuginfod/debuginfod-client.c | 55 +++++++++++++++++++++++++--------- tests/ChangeLog | 5 ++++ tests/run-debuginfod-find.sh | 13 ++++++++ 4 files changed, 66 insertions(+), 14 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index d9d11737..24ccb8ef 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,10 @@ +2021-07-09 Noah Sanci + + * debuginfod-client.c (debuginfod_query_server): As full-length + urls are generated with standardized formats, ignore duplicates. + Also update the number of urls to the unduplicated number of + urls. + 2021-06-18 Mark Wielaard * debuginfod-client.c (debuginfod_begin): Don't use client if diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index f417b40a..a9447cae 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -795,6 +795,7 @@ debuginfod_query_server (debuginfod_client *c, char *strtok_saveptr; char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); + int unduplicated_urls = 0; /* Initialize each handle. */ for (int i = 0; i < num_urls && server_url != NULL; i++) @@ -802,34 +803,59 @@ debuginfod_query_server (debuginfod_client *c, if (vfd >= 0) dprintf (vfd, "init server %d %s\n", i, server_url); - data[i].fd = fd; - data[i].target_handle = &target_handle; - data[i].handle = curl_easy_init(); - if (data[i].handle == NULL) - { - rc = -ENETUNREACH; - goto out1; - } - data[i].client = c; - - /* Build handle url. Tolerate both http://foo:999 and - http://foo:999/ forms */ char *slashbuildid; if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') slashbuildid = "buildid"; else slashbuildid = "/buildid"; + char *tmp_url = calloc(PATH_MAX, sizeof(char)); if (filename) /* must start with / */ - snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url, + snprintf(tmp_url, PATH_MAX, "%s%s/%s/%s%s", server_url, slashbuildid, build_id_bytes, type, filename); else - snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url, + snprintf(tmp_url, PATH_MAX, "%s%s/%s/%s", server_url, slashbuildid, build_id_bytes, type); + /* PR 27983: If the url is already set to be used use, skip it */ + int url_index = -1; + for (url_index = 0; url_index < i; ++url_index) + { + if(strcmp(tmp_url, data[url_index].url) == 0) + { + url_index = -1; + break; + } + } + if (url_index == -1) + { + if (vfd >= 0) + dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); + // Ensure that next iteration doesn't skip over an index mid-array + i--; + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + continue; + } + else + { + unduplicated_urls++; + strncpy(data[i].url, tmp_url, PATH_MAX); + free (tmp_url); + } + if (vfd >= 0) dprintf (vfd, "url %d %s\n", i, data[i].url); + data[i].fd = fd; + data[i].target_handle = &target_handle; + data[i].handle = curl_easy_init(); + if (data[i].handle == NULL) + { + rc = -ENETUNREACH; + goto out1; + } + data[i].client = c; curl_easy_setopt(data[i].handle, CURLOPT_URL, data[i].url); if (vfd >= 0) curl_easy_setopt(data[i].handle, CURLOPT_ERRORBUFFER, data[i].errbuf); @@ -863,6 +889,7 @@ debuginfod_query_server (debuginfod_client *c, curl_multi_add_handle(curlm, data[i].handle); server_url = strtok_r(NULL, url_delim, &strtok_saveptr); } + num_urls = unduplicated_urls; /* Query servers in parallel. */ if (vfd >= 0) diff --git a/tests/ChangeLog b/tests/ChangeLog index b65cbeb7..5747d658 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2021-07-09 Noah Sanci + + * run-debuginfod-find.sh: Wrote test to ensure duplicate urls are in + fact not checked. + 2021-07-02 Mark Wielaard * run-debuginfo-find.sh: unset VALGRIND_CMD before testing debuginfod diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 74a5ceff..6faaf70b 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -359,6 +359,19 @@ rm -rf extracted wait_ready $PORT1 'found_sourcerefs_total{source=".rpm archive"}' $sourcefiles +######################################################################## +# PR27983 ensure no duplicate urls are used in when querying servers for files +TMP_DEBUG_URLS=$DEBUGINFOD_URLS +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests +export DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http:127.0.0.1:7999" +env LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find -v executable $BUILDID2 > vlog4 2>&1 || true +tempfiles vlog4 +if [ $( grep -c 'duplicate url: http://127.0.0.1:'$PORT1'.*' vlog4 ) -ne 2 ]; then + echo "Duplicated servers undetected"; + err +fi +export DEBUGINFOD_URLS=$TMP_DEBUG_URLS +######################################################################## # Run a bank of queries against the debuginfod-rpms / debuginfod-debs test cases archive_test() { -- 2.31.1