From b9d36857b6b7cf4f2889280119ba01628afe2420 Mon Sep 17 00:00:00 2001 From: Noah Sanci Date: Fri, 9 Jul 2021 14:53:10 -0400 Subject: [PATCH] debuginfod: PR27983 - ignore duplicate urls Gazing at server logs, one sees a minority of clients who appear to have duplicate query traffic coming in: the same URL, milliseconds apart. Chances are the user accidentally doubled her $DEBUGINFOD_URLS somehow, and the client library is dutifully asking the servers TWICE. Bug #27863 reduces the pain on the servers' CPU, but dupe network traffic is still being paid. We should reject sending outright duplicate concurrent traffic. The urls are now simply removed upon finding a duplicate after url construction. https://sourceware.org/bugzilla/show_bug.cgi?id=27983 Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 9 +++ debuginfod/debuginfod-client.c | 104 +++++++++++++++++++++++---------- tests/ChangeLog | 6 ++ tests/run-debuginfod-find.sh | 11 ++++ 4 files changed, 99 insertions(+), 31 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 7709c24d..81eb56f9 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -34,6 +34,15 @@ (parse_opt): Handle 'r' option by setting regex_groom to true. (groom): Introduce and use reg_include and reg_exclude. +2021-07-09 Noah Sanci + + PR27983 + * debuginfod-client.c (debuginfod_query_server): As full-length + urls are generated with standardized formats, ignore duplicates. + Created out1 and changed out2 error gotos. Updated url creation print + statements. + (globals): Removed url_delim_char, as it was no longer used. + 2021-06-18 Mark Wielaard * debuginfod-client.c (debuginfod_begin): Don't use client if diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 26ba1891..5afefbfa 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -152,7 +152,6 @@ static const char *cache_xdg_name = "debuginfod_client"; /* URLs of debuginfods, separated by url_delim. */ static const char *url_delim = " "; -static const char url_delim_char = ' '; /* Timeout for debuginfods, in seconds (to get at least 100K). */ static const long default_timeout = 90; @@ -765,13 +764,60 @@ debuginfod_query_server (debuginfod_client *c, goto out0; } + /* Initialize the memory to zero */ + char *strtok_saveptr; + char **server_url_list = NULL; + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); /* Count number of URLs. */ int num_urls = 0; - for (int i = 0; server_urls[i] != '\0'; i++) - if (server_urls[i] != url_delim_char - && (i == 0 || server_urls[i - 1] == url_delim_char)) - num_urls++; - + + while (server_url != NULL) + { + /* PR 27983: If the url is already set to be used use, skip it */ + char *slashbuildid; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') + slashbuildid = "buildid"; + else + slashbuildid = "/buildid"; + + char *tmp_url; + if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) + { + rc = -ENOMEM; + goto out1; + } + int url_index; + for (url_index = 0; url_index < num_urls; ++url_index) + { + if(strcmp(tmp_url, server_url_list[url_index]) == 0) + { + url_index = -1; + break; + } + } + if (url_index == -1) + { + if (vfd >= 0) + dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); + } + else + { + num_urls++; + char ** realloc_ptr; + realloc_ptr = reallocarray(server_url_list, num_urls, + sizeof(char*)); + if (realloc_ptr == NULL) + { + rc = -ENOMEM; + goto out1; + } + server_url_list = realloc_ptr; + server_url_list[num_urls-1] = tmp_url; + } + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + } + int retry_limit = default_retry_limit; const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR); if (retry_limit_envvar != NULL) @@ -804,12 +850,11 @@ debuginfod_query_server (debuginfod_client *c, data[i].errbuf[0] = '\0'; } - char *strtok_saveptr; - char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); - /* Initialize each handle. */ - for (int i = 0; i < num_urls && server_url != NULL; i++) + for (int i = 0; i < num_urls; i++) { + if ((server_url = server_url_list[i]) == NULL) + break; if (vfd >= 0) dprintf (vfd, "init server %d %s\n", i, server_url); @@ -819,18 +864,10 @@ debuginfod_query_server (debuginfod_client *c, if (data[i].handle == NULL) { rc = -ENETUNREACH; - goto out1; + goto out2; } data[i].client = c; - /* Build handle url. Tolerate both http://foo:999 and - http://foo:999/ forms */ - char *slashbuildid; - if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') - slashbuildid = "buildid"; - else - slashbuildid = "/buildid"; - if (filename) /* must start with / */ { /* PR28034 escape characters in completed url to %hh format. */ @@ -841,14 +878,12 @@ debuginfod_query_server (debuginfod_client *c, rc = -ENOMEM; goto out1; } - snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s%s", server_url, - slashbuildid, build_id_bytes, type, escaped_string); + snprintf(data[i].url, PATH_MAX, "%s/%s/%s/%s", server_url, + build_id_bytes, type, escaped_string); curl_free(escaped_string); } else - snprintf(data[i].url, PATH_MAX, "%s%s/%s/%s", server_url, - slashbuildid, build_id_bytes, type); - + snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type); if (vfd >= 0) dprintf (vfd, "url %d %s\n", i, data[i].url); @@ -883,7 +918,6 @@ debuginfod_query_server (debuginfod_client *c, curl_easy_setopt(data[i].handle, CURLOPT_HTTPHEADER, c->headers); curl_multi_add_handle(curlm, data[i].handle); - server_url = strtok_r(NULL, url_delim, &strtok_saveptr); } /* Query servers in parallel. */ @@ -927,7 +961,7 @@ debuginfod_query_server (debuginfod_client *c, case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; default: rc = -ENETUNREACH; break; } - goto out1; + goto out2; } if (c->progressfn) /* inform/check progress callback */ @@ -1115,7 +1149,7 @@ debuginfod_query_server (debuginfod_client *c, goto query_in_parallel; } else - goto out1; + goto out2; } if (vfd >= 0) @@ -1145,7 +1179,7 @@ debuginfod_query_server (debuginfod_client *c, if (rc < 0) { rc = -errno; - goto out1; + goto out2; /* Perhaps we need not give up right away; could retry or something ... */ } @@ -1155,7 +1189,10 @@ debuginfod_query_server (debuginfod_client *c, curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ curl_easy_cleanup (data[i].handle); } - + + for (int i = 0; i < num_urls; ++i) + free(server_url_list[i]); + free(server_url_list); free (data); free (server_urls); @@ -1168,7 +1205,7 @@ debuginfod_query_server (debuginfod_client *c, goto out; /* error exits */ - out1: + out2: /* remove all handles from multi */ for (int i = 0; i < num_urls; i++) { @@ -1181,6 +1218,11 @@ debuginfod_query_server (debuginfod_client *c, (void) rmdir (target_cache_dir); /* nop if not empty */ free(data); + out1: + for (int i = 0; i < num_urls; ++i) + free(server_url_list[i]); + free(server_url_list); + out0: free (server_urls); @@ -1213,7 +1255,7 @@ debuginfod_query_server (debuginfod_client *c, free (target_cache_dir); free (target_cache_path); free (target_cache_tmppath); - return rc; + return rc; } diff --git a/tests/ChangeLog b/tests/ChangeLog index 3be9ee48..dba750e4 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -30,6 +30,12 @@ * run-debuginfod-find.sh: Added test case for grooming the database using regexes. +2021-07-09 Noah Sanci + + PR27983 + * run-debuginfod-find.sh: Wrote test to ensure duplicate urls are in + fact not checked. + 2021-07-08 Mark Wielaard * Makefile.am (EXTRA_DIST): Fix typo testfile-largealign.bz2 was diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 947c1261..44e16242 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -364,6 +364,17 @@ rm -rf extracted wait_ready $PORT1 'found_sourcerefs_total{source=".rpm archive"}' $sourcefiles +######################################################################## +# PR27983 ensure no duplicate urls are used in when querying servers for files +rm -rf $DEBUGINFOD_CACHE_PATH # clean it from previous tests +env DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http://127.0.0.1:$PORT1 http:127.0.0.1:7999" \ + LD_LIBRARY_PATH=$ldpath ${abs_top_builddir}/debuginfod/debuginfod-find -v executable $BUILDID2 > vlog4 2>&1 || true +tempfiles vlog4 +if [ $( grep -c 'duplicate url: http://127.0.0.1:'$PORT1'.*' vlog4 ) -ne 2 ]; then + echo "Duplicated servers remain"; + err +fi +######################################################################## # Run a bank of queries against the debuginfod-rpms / debuginfod-debs test cases archive_test() { -- 2.31.1