--- updatedb.orig 2022-02-05 09:37:55.000000000 -0800 +++ updatedb 2022-02-24 03:27:10.749175300 -0800 @@ -15,13 +15,20 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -# csh original by James Woods; sh conversion by David MacKenzie. +# csh original by James Woods; sh conversion by David MacKenzie; +# cleanup and enhancements by Dan Harkless. #exec 2> /tmp/updatedb-trace.txt #set -x +ourname=`basename $0` # don't verbosely report path to the script in errors + +stderr() { + echo "$ourname: $*" >&2 +} + version=' -updatedb (GNU findutils) 4.9.0 +updatedb (GNU findutils) 4.9.0+patches Copyright (C) 1994-2022 Free Software Foundation, Inc. License GPLv3+: GNU GPL version 3 or later This is free software: you are free to change and redistribute it. @@ -47,11 +54,12 @@ # (correctly) points to https://www.gnu.org/software/findutils/ instead # of the bug reporting page. usage="\ -Usage: $0 [--findoptions='-option1 -option2...'] +Usage: $ourname [--findoptions='-option1 -option2...'] [--localpaths='dir1 dir2...'] [--netpaths='dir1 dir2...'] [--prunepaths='dir1 dir2...'] [--prunefs='fs1 fs2...'] [--output=dbfile] [--netuser=user] [--localuser=user] - [--dbformat] [--version] [--help] + [--dbformat=(LOCATE02|slocate)] [--keeptxt=(sort|both)] + [--version] [--help] Please see also the documentation at https://www.gnu.org/software/findutils/. Report (and track progress on fixing) bugs in the updatedb @@ -61,31 +69,31 @@ " changeto=/ -for arg -do - # If we are unable to fork, the back-tick operator will - # fail (and the shell will emit an error message). When - # this happens, we exit with error value 71 (EX_OSERR). - # Alternative candidate - 75, EX_TEMPFAIL. - opt=`echo $arg|sed 's/^\([^=]*\).*/\1/'` || exit 71 - val=`echo $arg|sed 's/^[^=]*=\(.*\)/\1/'` || exit 71 - case "$opt" in - --findoptions) FINDOPTIONS="$val" ;; - --localpaths) SEARCHPATHS="$val" ;; - --netpaths) NETPATHS="$val" ;; - --prunepaths) PRUNEPATHS="$val" ;; - --prunefs) PRUNEFS="$val" ;; - --output) LOCATE_DB="$val" ;; - --netuser) NETUSER="$val" ;; - --localuser) LOCALUSER="$val" ;; - --changecwd) changeto="$val" ;; - --dbformat) dbformat="$val" ;; - --version) fail=0; echo "$version" || fail=1; exit $fail ;; - --help) fail=0; echo "$usage" || fail=1; exit $fail ;; - *) echo "updatedb: invalid option $opt -Try '$0 --help' for more information." >&2 - exit 1 ;; - esac +for arg; do + # If we are unable to fork, the back-tick operator will + # fail (and the shell will emit an error message). When + # this happens, we exit with error value 71 (EX_OSERR). + # Alternative candidate - 75, EX_TEMPFAIL. + opt=`echo $arg | sed 's/^\([^=]*\).*/\1/'` || exit 71 + val=`echo $arg | sed 's/^[^=]*=\(.*\)/\1/'` || exit 71 + case "$opt" in + --findoptions) FINDOPTIONS="$val" ;; + --localpaths) SEARCHPATHS="$val" ;; + --netpaths) NETPATHS="$val" ;; + --prunepaths) PRUNEPATHS="$val" ;; + --prunefs) PRUNEFS="$val" ;; + --output) LOCATE_DB="$val" ;; + --netuser) NETUSER="$val" ;; + --localuser) LOCALUSER="$val" ;; + --changecwd) changeto="$val" ;; + --dbformat) dbformat="$val" ;; + --keeptxt) keeptxt="$val" ;; + --version) fail=0; echo "$version" >&2 || fail=1; exit $fail ;; + --help) fail=0; echo "$usage" >&2 || fail=1; exit $fail ;; + *) stderr 'Invalid option "'$opt'".' + echo " Try '$ourname --help' for more information." >&2 + exit 1 ;; + esac done frcode_options="" @@ -100,13 +108,87 @@ ;; *) # The "old" database format is no longer supported. - echo "Unsupported locate database format ${dbformat}: Supported formats are:" >&2 - echo "LOCATE02, slocate" >&2 + stderr 'Unsupported locate database format "'$dbformat'".' + echo ' Supported formats are "LOCATE02" or "slocate".' >&2 exit 1 esac +# The database file to build (overridable via commandline or environment var.). +: ${LOCATE_DB=/var/locatedb} +LOCATE_DB_DIR=`dirname $LOCATE_DB` + +# Prevent overlapping with ourselves. Large filesystem collections can easily +# take over 24 hours to complete, even on pretty speedy systems / hard drives. +# Ideally this would go in /var/run on systems that have that, but this is OK. +lockfile=$LOCATE_DB.running_updatedb_pid + +if [ -e $lockfile ]; then + stderr "Aborting since prior run's lockfile still exists:" + ls -lF $lockfile >&2 + exit 1 +fi + +keeptxt=neither +reported_lockfile_failure=0 + +cleanup_on_exit_or_signal() { + rm -f $LOCATE_DB.n + + if [ $reported_lockfile_failure -ne 1 ]; then + # We didn't already have a failure trying to initially create the + # lockfile, so we can assume the temporary .txt files are ours (not + # saved on a previous run with --keeptxt), and it's safe to + # (optionally) delete them. + if [ x"$keeptxt" = x"sort" ]; then + rm -f $LOCATE_DB.txt + elif [ x"$keeptxt" != x"both" ]; then + # TBD: Report undefined values of --keeptxt? + rm -f $LOCATE_DB.txt $LOCATE_DB.txt.sort + fi + fi -if true + if ! rm -f $lockfile; then + report_lockfile_failure "remove" + fi +} + +report_lockfile_failure() { + if [ $reported_lockfile_failure -ne 1 ]; then + echo -n "$ourname: Failed to $1 lockfile $lockfile" >&2 + + if [ -e $lockfile ]; then + echo ":" >&2 + ls -lF $lockfile >&2 + else + echo " in dir:" >&2 + ls -dlF $LOCATE_DB_DIR >&2 + fi + + reported_lockfile_failure=1 + fi +} + +# Now that we've checked for a previous lockfile above, it's safe to install +# cleanup signal handler. We'll try to catch all potentially fatal signals, +# along with exit. From CentOS 7's /usr/include/asm/signal.h: +#[shell exit=0] SIGHUP=1 SIGINT=2 SIGQUIT=3 SIGILL=4 +#SIGTRAP=5 SIGABRT=6 SIGIOT=6 SIGBUS=7 SIGFPE=8 +#SIGKILL=9 SIGUSR1=10 SIGSEGV=11 SIGUSR2=12 SIGPIPE=13 +#SIGALRM=14 SIGTERM=15 SIGSTKFLT=16 SIGCHLD=17 SIGCONT=18 +#SIGSTOP=19 SIGTSTP=20 SIGTTIN=21 SIGTTOU=22 SIGURG=23 +#SIGXCPU=24 SIGXFSZ=25 SIGVTALRM=26 SIGPROF=27 SIGWINCH=28 +#SIGIO=29 SIGPOLL=SIGIO SIGLOST=29 SIGPWR=30 SIGSYS=31 +trap cleanup_on_exit_or_signal 0 2 3 4 6 7 8 9 11 15 16 30 31 + +# Now that we've installed the signal handler, it's safe to create lockfile. +if ! echo $$ > $lockfile; then + report_lockfile_failure "write to" + exit 1 +fi + +# Don't use NUL as a path separator, now that we write to a temporary text file +# (may want to make that controllable with a commandline option in the future). +if false then sort="/usr/bin/sort -z" print_option="-print0" @@ -123,25 +205,24 @@ id | cut -d'(' -f 1 | cut -d'=' -f2 } -# figure out if su supports the -s option +# Figure out if su supports the -s option. select_shell() { - if su "$1" -s $SHELL -c false < /dev/null ; then - # No. - echo "" + if su "$1" -s $SHELL -c false < /dev/null; then + # No. + echo "" else - if su "$1" -s $SHELL -c true < /dev/null ; then - # Yes. - echo "-s $SHELL" + if su "$1" -s $SHELL -c true < /dev/null; then + # Yes. + echo "-s $SHELL" else - # su is unconditionally failing. We won't be able to - # figure out what is wrong, so be conservative. - echo "" - fi + # su is unconditionally failing. We won't be able to + # figure out what is wrong, so be conservative. + echo "" + fi fi } - -# You can set these in the environment, or use command-line options, +# You can set these in the environment, or use command-line options # to override their defaults: # Any global options for find? @@ -156,10 +237,13 @@ # Network (NFS, AFS, RFS, etc.) directories to put in the database. : ${NETPATHS=} -# Directories to not put in the database, which would otherwise be. +# Default list of directories (overridable with options) to be omitted from the +# database. Note that /dev and /proc need to be specified "redundantly" here, +# since on Cygwin, they can't be detected based on filesystem type. : ${PRUNEPATHS=" /afs /amd +/dev /proc /sfs /tmp @@ -167,32 +251,34 @@ /var/tmp "} -# Trailing slashes result in regex items that are never matched, which -# is not what the user will expect. Therefore we now reject such -# constructs. +# Trailing slashes result in regex items that are never matched, which is +# not what the user will expect. Therefore we now reject such constructs. +# TBD: Just remove any trailing slashes instead? for p in $PRUNEPATHS; do case "$p" in - /*/) echo "$0: $p: pruned paths should not contain trailing slashes" >&2 - exit 1 + /*/) stderr "Prune path '$p' has a trailing slash, which isn't allowed." + exit 1 esac done -# The same, in the form of a regex that find can use. +# Convert $PRUNEPATHS to a regex that find can use. Note that to allow paths +# containing spaces, the first -e changes '\ ' to '///' ('//' isn't used since +# it's a semi-common artifact of path concatenation), and then the last -e +# changes '///' back to ' ' (it doesn't need backslashing in the regex). test -z "$PRUNEREGEX" && - PRUNEREGEX=`echo $PRUNEPATHS|sed -e 's,^,\\\(^,' -e 's, ,$\\\)\\\|\\\(^,g' -e 's,$,$\\\),'` + PRUNEREGEX=`echo $PRUNEPATHS | sed -e 's,\\\ ,///,g' -e 's,^,\\\(^,' -e 's, ,$\\\)\\\|\\\(^,g' -e 's,$,$\\\),' -e 's,///, ,g'` -# The database file to build. -: ${LOCATE_DB=/var/locatedb} - -# Directory to hold intermediate files. +# Directory for sort (& possibly other executables) to hold intermediate files. +# The script's own temporary files go in the same directory as the database, +# since they aren't always temporary (--keeptxt or left-behind lockfile). if test -z "$TMPDIR"; then - if test -d /var/tmp; then - : ${TMPDIR=/var/tmp} - elif test -d /usr/tmp; then - : ${TMPDIR=/usr/tmp} - else - : ${TMPDIR=/tmp} - fi + if test -d /var/tmp; then + : ${TMPDIR=/var/tmp} + elif test -d /usr/tmp; then + : ${TMPDIR=/usr/tmp} + else + : ${TMPDIR=/tmp} + fi fi export TMPDIR @@ -200,14 +286,14 @@ : ${NETUSER=daemon} # The directory containing the subprograms. -if test -n "$LIBEXECDIR" ; then +if test -n "$LIBEXECDIR"; then : LIBEXECDIR already set, do nothing else : ${LIBEXECDIR=/usr/libexec} fi # The directory containing find. -if test -n "$BINDIR" ; then +if test -n "$BINDIR"; then : BINDIR already set, do nothing else : ${BINDIR=/usr/bin} @@ -217,42 +303,19 @@ : ${find:=${BINDIR}/find} : ${frcode:=${LIBEXECDIR}/frcode} -make_tempdir () { - # This implementation is adapted from the GNU Autoconf manual. - { - tmp=` - (umask 077 && mktemp -d "$TMPDIR/updatedbXXXXXX") 2>/dev/null - ` && - test -n "$tmp" && test -d "$tmp" - } || { - # This method is less secure than mktemp -d, but it's a fallback. - # - # We use $$ as well as $RANDOM since $RANDOM may not be available. - # We also add a time-dependent suffix. This is actually somewhat - # predictable, but then so is $$. POSIX does not require date to - # support +%N. - ts=`date +%N%S || date +%S 2>/dev/null` - tmp="$TMPDIR"/updatedb"$$"-"${RANDOM:-}${ts}" - (umask 077 && mkdir "$tmp") - } - echo "$tmp" -} - -checkbinary () { - if test -x "$1" ; then - : ok +checkbinary() { + if test -x "$1"; then + : ok else - eval echo "updatedb needs to be able to execute $1, but cannot." >&2 - exit 1 + stderr "We need to be able to execute $1, but cannot." + exit 1 fi } -for binary in $find $frcode -do - checkbinary $binary +for binary in $find $frcode; do + checkbinary $binary done - : ${PRUNEFS=" 9P NFS @@ -276,67 +339,91 @@ "} if test -n "$PRUNEFS"; then -prunefs_exp=`echo $PRUNEFS |sed -e 's/\([^ ][^ ]*\)/-o -fstype \1/g' \ - -e 's/-o //' -e 's/$/ -o/'` + prunefs_exp=`echo $PRUNEFS | sed -e 's/\([^ ][^ ]*\)/-o -fstype \1/g' \ + -e 's/-o //' -e 's/$/ -o/'` else - prunefs_exp='' + prunefs_exp='' fi # Make and code the file list. -# Sort case insensitively for users' convenience. -rm -f $LOCATE_DB.n -trap 'rm -f $LOCATE_DB.n; exit' HUP TERM +if ! echo test > $LOCATE_DB.n; then + stderr "Failed to write to temporary database file $LOCATE_DB.n." + exit 1 +fi -if { -cd "$changeto" -if test -n "$SEARCHPATHS"; then - if [ "$LOCALUSER" != "" ]; then - # : A1 - su $LOCALUSER `select_shell $LOCALUSER` -c \ - "$find $SEARCHPATHS $FINDOPTIONS \ - \\( $prunefs_exp \ - -type d -regex '$PRUNEREGEX' \\) -prune -o $print_option" - else - # : A2 - $find $SEARCHPATHS $FINDOPTIONS \ - \( $prunefs_exp \ - -type d -regex "$PRUNEREGEX" \) -prune -o $print_option - fi -fi - -if test -n "$NETPATHS"; then -myuid=`getuid` -if [ "$myuid" = 0 ]; then - # : A3 - su $NETUSER `select_shell $NETUSER` -c \ - "$find $NETPATHS $FINDOPTIONS \\( -type d -regex '$PRUNEREGEX' -prune \\) -o $print_option" || - exit $? - else - # : A4 - $find $NETPATHS $FINDOPTIONS \( -type d -regex "$PRUNEREGEX" -prune \) -o $print_option || - exit $? - fi +# We now write to a temporary text file instead of going direct over a pipe, as +# the latter makes it very difficult to monitor progress and to debug failures. +if ! echo test > $LOCATE_DB.txt; then + stderr "Failed to write to text list of files $LOCATE_DB.txt." + exit 1 fi -} | $sort | $frcode $frcode_options > $LOCATE_DB.n + +failed_to_generate_locate_db=0 + +if { + cd "$changeto" + if test -n "$SEARCHPATHS"; then + if [ "$LOCALUSER" != "" ]; then + # : A1 + su $LOCALUSER `select_shell $LOCALUSER` -c \ + "$find $SEARCHPATHS $FINDOPTIONS \ + \\( $prunefs_exp \ + -type d -regex '$PRUNEREGEX' \\) -prune -o $print_option" + else + # : A2 + $find $SEARCHPATHS $FINDOPTIONS \ + \( $prunefs_exp \ + -type d -regex "$PRUNEREGEX" \) -prune -o $print_option + fi + fi + + if test -n "$NETPATHS"; then + myuid=`getuid` + if [ "$myuid" = 0 ]; then + # : A3 + su $NETUSER `select_shell $NETUSER` -c \ + "$find $NETPATHS $FINDOPTIONS \\( -type d -regex '$PRUNEREGEX' -prune \\) -o $print_option" || + exit $? + else + # : A4 + $find $NETPATHS $FINDOPTIONS \( -type d -regex "$PRUNEREGEX" \ + -prune \) -o $print_option || + exit $? + fi + fi +} > $LOCATE_DB.txt then - : OK so far - true + # OK, find completed. Going through all the files is very time-consuming + # on some systems, so (try to) save a copy of the previous DB in case + # something goes wrong at this point. + cp -fp $LOCATE_DB $LOCATE_DB.prev + + # Now sort results, case-insensitively for user convenience, then generate + # the new DB. + if ! $sort -f $LOCATE_DB.txt > $LOCATE_DB.txt.sort; then + failed_return_value=$? + failed_to_generate_locate_db=1 + elif ! $frcode $frcode_options < $LOCATE_DB.txt.sort > $LOCATE_DB.n; then + failed_return_value=$? + failed_to_generate_locate_db=1 + fi else - rv=$? - echo "Failed to generate $LOCATE_DB.n" >&2 + failed_to_generate_locate_db=1 +fi + +if [ $failed_to_generate_locate_db -eq 1 ]; then + stderr "Failed to generate new database temp file $LOCATE_DB.n." rm -f $LOCATE_DB.n - exit $rv + exit $failed_return_value fi -# To avoid breaking locate while this script is running, put the +# To avoid breaking locate while this script is running, we put the # results in a temp file, then rename it atomically. if test -s $LOCATE_DB.n; then - chmod 644 ${LOCATE_DB}.n - mv ${LOCATE_DB}.n $LOCATE_DB + chmod 644 $LOCATE_DB.n + mv -f $LOCATE_DB.n $LOCATE_DB else - echo "updatedb: new database would be empty" >&2 - rm -f $LOCATE_DB.n + stderr "New database would be empty, so not creating it." + rm -f $LOCATE_DB.n fi - -exit 0