public inbox for cygwin-apps@cygwin.com
 help / color / mirror / Atom feed
From: Jon Turney <jon.turney@dronecode.org.uk>
To: cygwin-apps@cygwin.com
Cc: Jon Turney <jon.turney@dronecode.org.uk>
Subject: [PATCH setup 2/2] Detect filename collisions between packages
Date: Sun, 23 Apr 2023 15:43:30 +0100	[thread overview]
Message-ID: <20230423144330.3107-3-jon.turney@dronecode.org.uk> (raw)
In-Reply-To: <20230423144330.3107-1-jon.turney@dronecode.org.uk>

Detect filename collisions between packages
Don't check filenames under /etc/postinstall/ for collisions
Report when filename collisions exist
Add option '--collisions' to enable

Notes:

Reading file catalog from a package is moderately expensive in terms of
I/O: To extract all the filenames from a tar archive, we need to seek to
every file header, and to seek forward through a compressed file, we
must examine every intervening byte to decompress it.

This adds a fourth(!) pass through each archive (one to checksum it, one
to extract files, another one (I added in dbfd1a64 without thinking too
deeply about it) to extract symlinks), and now one to check for filename
collisions).

Using std::set_intersection() on values from std::map() here is probably
a mistake. It's simple to write, but the performance is not good.
---
 filemanifest.h |  29 ++++++
 install.cc     | 254 +++++++++++++++++++++++++++++++++++++++++--------
 res/en/res.rc  |   3 +
 resource.h     |   3 +
 4 files changed, 247 insertions(+), 42 deletions(-)
 create mode 100644 filemanifest.h

diff --git a/filemanifest.h b/filemanifest.h
new file mode 100644
index 0000000..cc6fb81
--- /dev/null
+++ b/filemanifest.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2022 Jon Turney
+ *
+ *     This program is free software; you can redistribute it and/or modify
+ *     it under the terms of the GNU General Public License as published by
+ *     the Free Software Foundation; either version 2 of the License, or
+ *     (at your option) any later version.
+ *
+ *     A copy of the GNU General Public License can be found at
+ *     http://www.gnu.org/
+ *
+ */
+
+/*
+  FileManifest
+
+  A serializable set of filenames
+  each filename is associated with the package name that owns it
+
+  Used when working with the install package file manifests in /etc/setup
+*/
+
+#include <string>
+#include <unordered_map>
+#include <algorithm>
+
+class FileManifest: public std::map<std::string, std::string>
+{
+};
diff --git a/install.cc b/install.cc
index 628dbd0..bcdbf8d 100644
--- a/install.cc
+++ b/install.cc
@@ -58,6 +58,7 @@
 #include "package_meta.h"
 #include "package_version.h"
 #include "package_source.h"
+#include "filemanifest.h"
 
 #include "threebar.h"
 #include "Exception.h"
@@ -71,6 +72,7 @@ static off_t package_bytes = 0;
 
 static BoolOption NoReplaceOnReboot (false, 'r', "no-replaceonreboot", IDS_HELPTEXT_NO_REPLACEONREBOOT);
 static BoolOption NoWriteRegistry (false, '\0', "no-write-registry", IDS_HELPTEXT_NO_WRITE_REGISTRY);
+static BoolOption Collisions (false, '\0', "collisions", IDS_HELPTEXT_CHECK_COLLISIONS);
 
 struct std_dirs_t {
   const char *name;
@@ -101,6 +103,32 @@ private:
   const std::string stratum;
 };
 
+// check if this is a collidable filename (i.e. a filename we should check for collisions)
+static bool
+collidableFilename(const std::string &fn)
+{
+  // ignore reserved paths (starting with '.')
+  if (fn[0] == '.')
+    return false;
+
+  // ignore directories
+  if (fn.back() == '/')
+    return false;
+
+  // ignore scripts under /etc/postinstall
+  // XXX: only should apply to non-perpetual scripts
+  //
+  // cygport allows multiple install packages made from the same source to
+  // contain the same post-install script, but this could still be a problem if we
+  // ever have two packages with different scripts with the same name...
+  //
+  // XXX: possibly use isAScript, but that could be written more efficiently...
+  if (fn.rfind("etc/postinstall/", 0) == 0)
+    return false;
+
+  return true;
+}
+
 class Installer
 {
   public:
@@ -116,15 +144,17 @@ class Installer
     void installOne (packagemeta &pkg, const packageversion &ver,
                      packagesource &source,
                      const std::string& , const std::string&, HWND );
+    void manifestOne(packagesource &source, const std::string& name, FileManifest &manifest);
     int errors;
   private:
     bool extract_replace_on_reboot(archive *, const std::string&,
                                    const std::string&, std::string);
+
+    archive *_openOne(packagesource &source);
     bool _installOne (packagemeta &pkgm,
                       const std::string& prefixURL,
                       const std::string& prefixPath,
                       HWND owner,
-                      io_stream *pkgfile,
                       archive *tarstream,
                       io_stream *lst,
                       bool symlink_phase);
@@ -210,6 +240,38 @@ Installer::preremoveOne (packagemeta & pkg)
     try_run_script ("/etc/preremove/", pkg.name, exts[i]);
 }
 
+// utility class for reading an installed package's file manifest
+class FileManifestReader
+{
+public:
+  FileManifestReader(std::string pkgname)
+  {
+    listfile = io_stream::open ("cygfile:///etc/setup/" + pkgname + ".lst.gz", "rb", 0);
+    listdata = compress::decompress (listfile);
+  }
+
+  ~FileManifestReader()
+  {
+    delete listdata;
+  }
+
+  bool readline(std::string &line)
+  {
+    line.reserve(CYG_PATH_MAX);
+    const char *sz = listdata->gets (&line[0], CYG_PATH_MAX);
+
+    // NULL indicates EOF
+    if (sz == NULL)
+      return false;
+
+    return true;
+  }
+
+private:
+  io_stream *listfile;
+  io_stream *listdata;
+};
+
 void
 Installer::uninstallOne (packagemeta & pkg)
 {
@@ -222,18 +284,10 @@ Installer::uninstallOne (packagemeta & pkg)
 
   std::set<std::string> dirs;
 
-  io_stream *listfile = io_stream::open ("cygfile:///etc/setup/" + pkg.name + ".lst.gz", "rb", 0);
-  io_stream *listdata = compress::decompress (listfile);
-
-  while (listdata)
+  FileManifestReader reader(pkg.name);
+  std::string line;
+  while (reader.readline(line))
     {
-      char getfilenamebuffer[CYG_PATH_MAX];
-      const char *sz = listdata->gets (getfilenamebuffer, sizeof (getfilenamebuffer));
-      if (sz == NULL)
-        break;
-
-      std::string line(sz);
-
       /* Insert the paths of all parent directories of line into dirs. */
       size_t idx = line.length();
       while ((idx = line.find_last_of('/', idx-1)) != std::string::npos)
@@ -271,7 +325,6 @@ Installer::uninstallOne (packagemeta & pkg)
     }
 
   /* Remove the listing file */
-  delete listdata;
   io_stream::remove ("cygfile:///etc/setup/" + pkg.name + ".lst.gz");
 
   /* An STL set maintains itself in sorted order. Thus, iterating over it
@@ -432,19 +485,13 @@ Installer::extract_replace_on_reboot (archive *tarstream, const std::string& pre
 }
 
 static char all_null[512];
+#define EMPTY_ARCHIVE (archive *)-1
 
-/* install one source at a given prefix. */
-void
-Installer::installOne (packagemeta &pkgm, const packageversion &ver,
-                       packagesource &source,
-                       const std::string& prefixURL,
-                       const std::string& prefixPath,
-                       HWND owner)
+archive *
+Installer::_openOne(packagesource &source)
 {
   if (!source.Canonical())
-    return;
-  Progress.SetText1 (IDS_PROGRESS_INSTALL);
-  Progress.SetText2 ((pkgm.name + "-" + ver.Canonical_version()).c_str());
+    return NULL;
 
   io_stream *pkgfile = NULL;
 
@@ -452,7 +499,7 @@ Installer::installOne (packagemeta &pkgm, const packageversion &ver,
     {
       note (NULL, IDS_ERR_OPEN_READ, source.Canonical (), "Unknown filename");
       ++errors;
-      return;
+      return NULL;
     }
 
   if (!io_stream::exists (source.Cached ())
@@ -460,10 +507,9 @@ Installer::installOne (packagemeta &pkgm, const packageversion &ver,
     {
       note (NULL, IDS_ERR_OPEN_READ, source.Cached (), "No such file");
       ++errors;
-      return;
+      return NULL;
     }
 
-
   /* At this point pkgfile is an opened io_stream to either a .tar.bz2 file,
      a .tar.gz file, a .tar.lzma file, or just a .tar file.  Try it first as
      a compressed file and if that fails try opening it as a tar directly.
@@ -485,20 +531,20 @@ Installer::installOne (packagemeta &pkgm, const packageversion &ver,
         {
           /* Decompression succeeded but we couldn't grok it as a valid tar
              archive.  */
+          archive *result = NULL;
           char c[512];
-	  ssize_t len;
+          ssize_t len;
           if ((len = try_decompress->peek (c, 512)) < 0
-	      || !memcmp (c, all_null, len))
+              || !memcmp (c, all_null, len))
             /* In some cases, maintainers have uploaded bzipped
                0-byte files as dummy packages instead of empty tar files.
                This is common enough that we should not treat this as an
                error condition.
-	       Same goes for tar archives consisting of a big block of
-	       all zero bytes (the famous 46 bytes tar archives). */
-	    {
-	      if (ver.Type () == package_binary)
-		pkgm.installed = ver;
-	    }
+               Same goes for tar archives consisting of a big block of
+               all zero bytes (the famous 46 bytes tar archives). */
+            {
+              result = EMPTY_ARCHIVE;
+            }
           else
             {
               note (NULL, IDS_ERR_OPEN_READ, source.Cached (),
@@ -506,7 +552,7 @@ Installer::installOne (packagemeta &pkgm, const packageversion &ver,
               ++errors;
             }
           delete try_decompress;
-          return;
+          return result;
         }
     }
   else if ((tarstream = archive::extract (pkgfile)) == NULL)
@@ -516,6 +562,31 @@ Installer::installOne (packagemeta &pkgm, const packageversion &ver,
       note (NULL, IDS_ERR_OPEN_READ, source.Cached (),
             "Unrecognisable file format");
       ++errors;
+      return NULL;
+    }
+
+  return tarstream;
+}
+
+/* install one source at a given prefix. */
+void
+Installer::installOne (packagemeta &pkgm, const packageversion &ver,
+                       packagesource &source,
+                       const std::string& prefixURL,
+                       const std::string& prefixPath,
+                       HWND owner)
+{
+  Progress.SetText1 (IDS_PROGRESS_INSTALL);
+  Progress.SetText2 ((pkgm.name + "-" + ver.Canonical_version()).c_str());
+
+  archive *tarstream = _openOne(source);
+  if (!tarstream)
+    return;
+
+  if (tarstream == EMPTY_ARCHIVE)
+    {
+      if (ver.Type () == package_binary)
+        pkgm.installed = ver;
       return;
     }
 
@@ -549,12 +620,12 @@ Installer::installOne (packagemeta &pkgm, const packageversion &ver,
   Log (LOG_PLAIN) << "Extracting from " << source.Cached () << endLog;
 
   bool error_in_this_package = _installOne(pkgm, prefixURL, prefixPath, owner,
-                                           pkgfile, tarstream, lst, false);
+                                           tarstream, lst, false);
   if (tarstream->seek(0, IO_SEEK_SET) == -1)
     Log (LOG_PLAIN) << "Error rewinding to extract symlinks" << source.Cached () << endLog;
 
   error_in_this_package |= _installOne(pkgm, prefixURL, prefixPath, owner,
-                                       pkgfile, tarstream, lst, true);
+                                       tarstream, lst, true);
 
   if (lst)
     delete lst;
@@ -575,7 +646,6 @@ Installer::_installOne (packagemeta &pkgm,
                         const std::string& prefixURL,
                         const std::string& prefixPath,
                         HWND owner,
-                        io_stream *pkgfile,
                         archive *tarstream,
                         io_stream *lst,
                         bool symlink_phase)
@@ -764,13 +834,39 @@ Installer::_installOne (packagemeta &pkgm,
 
           break;
         }
-      progress (pkgfile->tell ());
+      // progress is reported against the length of underlying file who's total
+      // size is always known in advance (unlike the that of the tar archive, if
+      // it's compressed)
+      progress (tarstream->underlying()->tell ());
       num_installs++;
     }
 
   return error_in_this_package;
 }
 
+void
+Installer::manifestOne(packagesource &source, const std::string &name, FileManifest &manifest)
+{
+  archive *tarstream = _openOne(source);
+  if (!tarstream)
+    return;
+
+  if (tarstream == EMPTY_ARCHIVE)
+    return;
+
+  std::string fn;
+  while ((fn = tarstream->next_file_name ()).size ())
+    {
+      // should be ignored?
+      if (collidableFilename(fn))
+        manifest[fn] = name;
+
+      tarstream->skip_file();
+    }
+
+  delete tarstream;
+}
+
 static void
 check_for_old_cygwin (HWND owner)
 {
@@ -802,10 +898,72 @@ check_for_old_cygwin (HWND owner)
 }
 
 static void
-do_install_thread (HINSTANCE h, HWND owner)
+check_for_collisions (Installer &myInstaller, std::vector <packageversion> &install_q)
 {
-  int i;
+  /* Build file manifest for currently installed packages  ... */
+  Progress.SetText1(IDS_PROGRESS_CONFLICTS);
+  Progress.SetBar1(0);
+
+  FileManifest manifest;
+  for (packagedb::packagecollection::iterator i = packagedb::packages.begin ();
+       i != packagedb::packages.end ();
+       ++i)
+    {
+      Progress.SetBar2(std::distance(packagedb::packages.begin(), i), packagedb::packages.size());
+
+      packagemeta & pkgm = *(i->second);
+      /* installed and not getting uninstalled (perhaps to upgrade or reinstall) */
+      if (pkgm.installed && (pkgm.desired != pkgm.installed))
+        {
+          FileManifestReader reader(pkgm.name);
+
+          std::string line;
+          while (reader.readline(line))
+            {
+              if (collidableFilename(line))
+                manifest[line] = pkgm.name;
+            }
+        }
+    }
+
+  /* ... then check for conflicts with each package to install ... */
+  for (std::vector <packageversion>::iterator i = install_q.begin ();
+       i != install_q.end (); ++i)
+  {
+    Progress.SetBar2(std::distance(install_q.begin(), i), install_q.size());
+
+    packageversion & pkg = *i;
+    FileManifest newmanifest;
+    myInstaller.manifestOne(*pkg.source(), i->Name(), newmanifest);
+
+    FileManifest intersect;
+    std::set_intersection(manifest.begin(), manifest.end(),
+                          newmanifest.begin(), newmanifest.end(),
+                          std::inserter(intersect, intersect.begin()),
+                          manifest.value_comp());
+
+    if (!intersect.empty())
+      {
+        Log (LOG_PLAIN) << "package " << i->Name() << " has " << intersect.size() << " conflicts" << endLog;
+        for (FileManifest::iterator j = intersect.begin();
+             j != intersect.end();
+             j++)
+          {
+            Log (LOG_PLAIN) << "packages " << i->Name() << " and " << j->second << " both contain file /" << j->first << endLog;
+          }
+        ++myInstaller.errors;
+      }
+    else
+      {
+        /* ... adding each packages file manifest as we go */
+        manifest.insert(newmanifest.begin(), newmanifest.end());
+      }
+  }
+}
 
+static void
+do_install_thread (HINSTANCE h, HWND owner)
+{
   num_installs = 0, num_uninstalls = 0;
   rebootneeded = false;
 
@@ -813,7 +971,7 @@ do_install_thread (HINSTANCE h, HWND owner)
                        std::string("file://") + std::string(get_root_dir()),
 		       0755);
 
-  for (i = 0; Installer::StandardDirs[i].name; i++)
+  for (int i = 0; Installer::StandardDirs[i].name; i++)
   {
     std::string p = cygpath (Installer::StandardDirs[i].name);
     if (p.size())
@@ -907,6 +1065,16 @@ do_install_thread (HINSTANCE h, HWND owner)
       Progress.SetBar2 (md5sum_total_bytes_sofar, md5sum_total_bytes);
   }
 
+  if (Collisions)
+    {
+      check_for_collisions(myInstaller, install_q);
+
+      if (myInstaller.errors)
+        {
+          fatal (owner, IDS_CONFLICT);
+        }
+    }
+
   /* start with uninstalls - remove files that new packages may replace */
   Progress.SetBar2(0);
   myInstaller.preremovePerpetual ("0");
@@ -934,6 +1102,7 @@ do_install_thread (HINSTANCE h, HWND owner)
     Progress.SetBar2(std::distance(uninstall_q.begin(), i) + 1, uninstall_q.size());
   }
 
+  /* installs */
   for (std::vector <packageversion>::iterator i = install_q.begin ();
        i != install_q.end (); ++i)
   {
@@ -956,6 +1125,7 @@ do_install_thread (HINSTANCE h, HWND owner)
     }
   }
 
+  /* source package installs */
   for (std::vector <packageversion>::iterator i = sourceinstall_q.begin ();
        i != sourceinstall_q.end (); ++i)
   {
diff --git a/res/en/res.rc b/res/en/res.rc
index e2f04fd..e2e4418 100644
--- a/res/en/res.rc
+++ b/res/en/res.rc
@@ -649,6 +649,8 @@ BEGIN
     IDS_FILE_INUSE_KILL "&Kill Processes"
     IDS_FILE_INUSE_MSG "Unable to extract"
     IDS_USER_URL_TOOLTIP "Additional site URL, path or UNC path"
+    IDS_PROGRESS_CONFLICTS "Checking for filename conflicts..."
+    IDS_CONFLICT "Filename conflicts"
 END
 
 //
@@ -664,6 +666,7 @@ BEGIN
     IDS_HELPTEXT_ALLOW_UNSUPPORTED_WINDOWS "Allow old, unsupported Windows versions"
     IDS_HELPTEXT_ARCH "Architecture to install (x86_64 or x86)"
     IDS_HELPTEXT_CATEGORIES "Specify categories to install"
+    IDS_HELPTEXT_CHECK_COLLISIONS "Check for filename collisions between packages"
     IDS_HELPTEXT_COMPACTOS "Compress installed files with Compact OS (xpress4k, xpress8k, xpress16k, lzx)"
     IDS_HELPTEXT_DELETE_ORPHANS "Remove orphaned packages"
     IDS_HELPTEXT_DISABLE_ANTIVIRUS "Disable known or suspected buggy anti virus software packages during execution"
diff --git a/resource.h b/resource.h
index a08489c..72ff554 100644
--- a/resource.h
+++ b/resource.h
@@ -108,6 +108,8 @@
 #define IDS_VIEW_UNNEEDED                1212
 #define IDS_UNSUPPORTED_WINDOWS_ARCH     1213
 #define IDS_USER_URL_TOOLTIP             1214
+#define IDS_PROGRESS_CONFLICTS           1215
+#define IDS_CONFLICT                     1216
 
 #define IDS_HELPTEXT_COMPACTOS           1500
 #define IDS_HELPTEXT_PUBKEY              1501
@@ -158,6 +160,7 @@
 #define IDS_HELPTEXT_HEADER              1546
 #define IDS_HELPTEXT_FOOTER              1547
 #define IDS_HELPTEXT_NO_WRITE_REGISTRY   1548
+#define IDS_HELPTEXT_CHECK_COLLISIONS    1549
 
 // Dialogs
 
-- 
2.39.0


  parent reply	other threads:[~2023-04-23 14:44 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-23 14:43 [PATCH setup 0/2] " Jon Turney
2023-04-23 14:43 ` [PATCH setup 1/2] Add underlying() method to io_stream class Jon Turney
2023-04-23 14:43 ` Jon Turney [this message]
2023-04-24 16:26   ` [PATCH setup 2/2] Detect filename collisions between packages Christian Franke
2023-04-27 16:11     ` Jon Turney
2023-04-27 17:20       ` Christian Franke
2023-04-24 16:46 ` [PATCH setup 0/2] " Corinna Vinschen
2023-04-24 18:16   ` Achim Gratz
2023-04-27 16:11     ` Jon Turney
2023-04-28  5:51       ` Brian Inglis
2023-04-30 18:25         ` Jon Turney
2023-05-04  4:14           ` Brian Inglis

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230423144330.3107-3-jon.turney@dronecode.org.uk \
    --to=jon.turney@dronecode.org.uk \
    --cc=cygwin-apps@cygwin.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).