From f126b48bf1131070d80e063bfd296ddb69af8c9a Mon Sep 17 00:00:00 2001 From: Noah Date: Wed, 7 Jul 2021 14:40:10 -0400 Subject: [PATCH] debuginfod: PR27711 - Use -I/-X regexes during groom phase The debuginfod -I/-X regexes operate during traversal to identify those files in need of scanning. The regexes are not used during grooming. This means that if from run to run, the regex changes so that formerly indexed files are excluded from traversal, the data is still retained in the index. This is both good and bad. On one hand, if the underlying data is still available, grooming will preserve the data, and let clients ask for it. On the other hand, if the growing index size is a problem, and one wishes to age no-longer-regex-matching index data out, there is no way. Let's add a debuginfod flag to use regexes during grooming. Specifically, in groom(), where the stat() test exists, also check for regex matching as in scan_source_paths(). Treat failure of the regex the same way as though the file didn't exist. Signed-off-by: Noah Sanci --- debuginfod/ChangeLog | 8 ++++++++ debuginfod/debuginfod.cxx | 11 ++++++++-- doc/debuginfod.8 | 3 +++ tests/ChangeLog | 5 +++++ tests/run-debuginfod-find.sh | 39 ++++++++++++++++++++++++++++++------ 5 files changed, 58 insertions(+), 8 deletions(-) diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 286c910a..29d3e815 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,11 @@ +2021-07-01 Noah Sanci + + PR27711 + * debuginfod.cxx (options): Add --regex-groon, -r option. + (regex_groom): New static bool defaults to false. + (parse_opt): Handle 'r' option by setting regex_groom to true. + (groom): Introduce and use reg_include and reg_exclude. + 2021-06-03 Frank Ch. Eigler PR27863 diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 543044c6..4f7fd2d5 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -360,6 +360,7 @@ static const struct argp_option options[] = { "database", 'd', "FILE", 0, "Path to sqlite database.", 0 }, { "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 }, { "verbose", 'v', NULL, 0, "Increase verbosity.", 0 }, + { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0}, #define ARGP_KEY_FDCACHE_FDS 0x1001 { "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 }, #define ARGP_KEY_FDCACHE_MBS 0x1002 @@ -407,6 +408,7 @@ static map scan_archives; static vector extra_ddl; static regex_t file_include_regex; static regex_t file_exclude_regex; +static bool regex_groom = false; static bool traverse_logical; static long fdcache_fds; static long fdcache_mbs; @@ -527,6 +529,9 @@ parse_opt (int key, char *arg, if (rc != 0) argp_failure(state, 1, EINVAL, "regular expression"); break; + case 'r': + regex_groom = true; + break; case ARGP_KEY_FDCACHE_FDS: fdcache_fds = atol (arg); break; @@ -3249,8 +3254,11 @@ void groom() int64_t fileid = sqlite3_column_int64 (files, 1); const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: ""); struct stat s; + bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0); + bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0); + rc = stat(filename, &s); - if (rc < 0 || (mtime != (int64_t) s.st_mtime)) + if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || (mtime != (int64_t) s.st_mtime) ) { if (verbose > 2) obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl; @@ -3261,7 +3269,6 @@ void groom() } else inc_metric("groomed_total", "decision", "fresh"); - if (sigusr1 != forced_rescan_count) // stop early if scan triggered break; } diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 1ba42cf6..1adf703a 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -159,6 +159,9 @@ scan, independent of the rescan time (including if it was zero), interrupting a groom pass (if any). .TP +.B "\-r" +Apply the -I and -X during groom cycles, so that files excluded by the regexes are removed from the index. These parameters are in addition to what normally qualifies a file for grooming, not a replacement. + .B "\-g SECONDS" "\-\-groom\-time=SECONDS" Set the groom time for the index database. This is the amount of time the grooming thread will wait after finishing a grooming pass before diff --git a/tests/ChangeLog b/tests/ChangeLog index d8fa97fa..346b9e6e 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,8 @@ +2021-07-01 Noah Sanci + PR2711 + * run-debuginfod-find.sh: Added test case for grooming the database + using regexes. + 2021-06-16 Frank Ch. Eigler * run-debuginfod-find.sh: Fix intermittent groom/stale failure, diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 456dc2f8..7f66c322 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -36,13 +36,14 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache PID1=0 PID2=0 PID3=0 +PID4=0 cleanup() { - if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi - if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi - if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi - + if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi + if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi + if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi + if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp* exit_cleanup } @@ -293,7 +294,8 @@ kill -USR1 $PID1 wait_ready $PORT1 'thread_work_total{role="traverse"}' 3 wait_ready $PORT1 'thread_work_pending{role="scan"}' 0 wait_ready $PORT1 'thread_busy{role="scan"}' 0 - +cp $DB $DB.backup +tempfiles $DB.backup # Rerun same tests for the prog2 binary filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v debuginfo $BUILDID2 2>vlog` cmp $filename F/prog2 @@ -705,4 +707,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/" filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c` cmp $filename ${local_dir}/main.c -exit 0 +######################################################################## +## PR27711 +# Test to ensure that the --include="^$" --exclude=".*" options remove all files from a database backup +while true; do + PORT3=`expr '(' $RANDOM % 1000 ')' + 9000` + ss -atn | fgrep ":$PORT3" || break +done +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/" ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0 --regex-groom --include="^$" --exclude=".*" -d $DB.backup > vlog$PORT3 2>&1 & +PID4=$! +wait_ready $PORT3 'ready' 1 +tempfiles vlog$PORT3 +errfiles vlog$PORT3 + +kill -USR2 $PID4 +wait_ready $PORT3 'thread_work_total{role="groom"}' 1 +wait_ready $PORT3 'groom{statistic="archive d/e"}' 0 +wait_ready $PORT3 'groom{statistic="archive sdef"}' 0 +wait_ready $PORT3 'groom{statistic="archive sref"}' 0 +wait_ready $PORT3 'groom{statistic="buildids"}' 0 +wait_ready $PORT3 'groom{statistic="file d/e"}' 0 +wait_ready $PORT3 'groom{statistic="file s"}' 0 +wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0 +wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0 + +kill $PID4 +exit 0; -- 2.31.1