public inbox for java-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Patch: rewrite File.toCanonicalPath() for GNU/Posix systems
@ 2006-04-03 12:32 Gary Benson
  2006-04-05 22:00 ` Tom Tromey
  2006-04-07 15:53 ` Patch: rewrite File.toCanonicalPath() take 2 Gary Benson
  0 siblings, 2 replies; 14+ messages in thread
From: Gary Benson @ 2006-04-03 12:32 UTC (permalink / raw)
  To: java-patches

[-- Attachment #1: Type: text/plain, Size: 788 bytes --]

Hi all,

In order for FilePermission checks to work properly it is necessary to
convert all paths to canonical form before comparing them.  I recently
fixed this in Classpath, but the fix relies on File.toCanonicalPath()
handling symbolic links as other JVMs.  GCJ's current implementation
has a number of drawbacks, which this patch fixes.

The diff of the method itself is not particularly legible, so I've
attached a copy of the new method as well.

Cheers,
Gary

2006-04-03  Gary Benson  <gbenson@redhat.com>

	Partial fix for PR classpath/24895
	java/io/natFilePosix.cc (getCanonicalPath): Rewrite.
	java/io/File.java (getCanonicalPath): Javadoc fix.
	configure.ac: Replace check for realpath with checks for
	lstat and readlink.
	configure: Rebuilt.
	include/config.h.in: Likewise.

[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 8763 bytes --]

Index: libjava/java/io/natFilePosix.cc
===================================================================
--- libjava/java/io/natFilePosix.cc	(revision 112529)
+++ libjava/java/io/natFilePosix.cc	(working copy)
@@ -104,91 +104,128 @@
 jstring
 java::io::File::getCanonicalPath (void)
 {
-  // We use `+2' here because we might need to use `.' for our special
-  // case.
-  char *buf = (char *) __builtin_alloca (JvGetStringUTFLength (path) + 2);
-  char buf2[MAXPATHLEN];
-  jsize total = JvGetStringUTFRegion (path, 0, path->length(), buf);
+  jstring path = getAbsolutePath ();
 
-  // Special case: treat "" the same as ".".
-  if (total == 0)
-    buf[total++] = '.';
+  int len = JvGetStringUTFLength (path);
+  if (len >= MAXPATHLEN)
+    throw new IOException (JvNewStringLatin1 ("Path too long"));
 
-  buf[total] = '\0';
+  char *src = (char *) _Jv_Malloc (MAXPATHLEN * 3);
+  char *dst = src + MAXPATHLEN;
+  char *tmp = dst + MAXPATHLEN;
 
-#ifdef HAVE_REALPATH
-  if (realpath (buf, buf2) == NULL)
+  JvGetStringUTFRegion (path, 0, path->length(), src);
+  src[len] = '\0';
+  int srci = 1;
+
+  dst[0] = '/';
+  int dsti = 1;
+
+  bool fschecks = true;
+
+  while (src[srci] != '\0')
     {
-      // If realpath failed, we have to come up with a canonical path
-      // anyway.  We do this with purely textual manipulation.
-      // FIXME: this isn't perfect.  You can construct a case where
-      // we get a different answer from the JDK:
-      // mkdir -p /tmp/a/b/c
-      // ln -s /tmp/a/b /tmp/a/z
-      // ... getCanonicalPath("/tmp/a/z/c/nosuchfile")
-      // We will give /tmp/a/z/c/nosuchfile, while the JDK will
-      // give /tmp/a/b/c/nosuchfile.
-      int out_idx;
-      if (buf[0] != '/')
+      // Skip slashes.
+      while (src[srci] == '/')
+	srci++;
+      int tmpi = srci;
+      // Find next slash.
+      while (src[srci] != '/' && src[srci] != '\0')
+	srci++;
+      if (srci == tmpi)
+	// We hit the end.
+	break;
+      len = srci - tmpi;
+
+      // Handle "." and "..".
+      if (len == 1 && src[tmpi] == '.')
+	continue;
+      if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
 	{
-	  // Not absolute, so start with current directory.
-	  if (getcwd (buf2, sizeof (buf2)) == NULL)
-	    throw new IOException ();
-	  out_idx = strlen (buf2);
+	  if (dsti == 1)
+	    {
+	      // Unlike other JVMs we do not rewind past the root
+	      // directory.  I can't see any legitimate reason why you
+	      // would want this, and chopping off bits of path seems
+	      // like a sure-fire way to introduce vulnerabilities.
+	      _Jv_Free (src);
+	      throw new IOException (
+		JvNewStringLatin1 ("Too many up-level references"));
+	    }
+	  while (dsti > 1 && dst[dsti - 1] != '/')
+	    dsti--;
+	  if (dsti != 1)
+	    dsti--;
+	  // Reenable filesystem checking if disabled, as we might
+	  // have reversed over whatever caused the problem before.
+	  // At least one proprietary JVM has inconsistencies because
+	  // it does not do this.
+	  fschecks = true;
+	  continue;
 	}
-      else
+
+      // Handle real path components.
+      if (dsti + len + 1 >= MAXPATHLEN)
 	{
-	  buf2[0] = '/';
-	  out_idx = 1;
-	} 
-      int in_idx = 0;
-      while (buf[in_idx] != '\0')
+	  _Jv_Free (src);
+	  throw new IOException (JvNewStringLatin1 ("Path too long"));
+	}
+      int dsti_save = dsti;
+      if (dsti > 1)
+	dst[dsti++] = '/';
+      strncpy (&dst[dsti], &src[tmpi], len);
+      dsti += len;
+      if (fschecks == false)
+	continue;
+
+#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
+      struct stat sb;
+      dst[dsti] = '\0';
+      if (::lstat (dst, &sb) == 0)
 	{
-	  // Skip '/'s.
-	  while (buf[in_idx] == '/')
-	    ++in_idx;
-	  int elt_start = in_idx;
-	  // Find next '/' or end of path.
-	  while (buf[in_idx] != '\0' && buf[in_idx] != '/')
-	    ++in_idx;
-	  if (in_idx == elt_start)
+	  if (S_ISLNK (sb.st_mode))
 	    {
-	      // An empty component means we've reached the end.
-	      break;
+	      tmpi = ::readlink (dst, tmp, MAXPATHLEN);
+	      if (tmpi < 1 || tmpi == MAXPATHLEN)
+		{
+		  _Jv_Free (src);
+		  throw new IOException (JvNewStringLatin1 ("Path too long"));
+		}
+
+	      // Prepend the link's path to src.
+	      if (tmpi + strlen (&src[srci]) >= MAXPATHLEN)
+		{
+		  _Jv_Free (src);
+		  throw new IOException (JvNewStringLatin1 ("Path too long"));
+		}
+	      while (src[srci] != '\0')
+		tmp[tmpi++] = src[srci++];
+	      tmp[tmpi] = '\0';
+	      strcpy (src, tmp);
+	      srci = 0;
+
+	      // Either replace or append dst depending on whether the
+	      // link is relative or absolute.
+	      dsti = tmp[0] == '/' ? 1 : dsti_save;
 	    }
-	  int len = in_idx - elt_start;
-	  if (len == 1 && buf[in_idx] == '.')
-	    continue;
-	  if (len == 2 && buf[in_idx] == '.' && buf[in_idx + 1] == '.')
-	    {
-	      // Found ".." component, lop off last part from existing
-	      // buffer.
-	      --out_idx;
-	      while (out_idx > 0 && buf2[out_idx] != '/')
-		--out_idx;
-	      // Can't go up past "/".
-	      if (out_idx == 0)
-		++out_idx;
-	    }
-	  else
-	    {
-	      // Append a real path component to the output.
-	      if (out_idx > 1)
-		buf2[out_idx++] = '/';
-	      strncpy (&buf2[out_idx], &buf[elt_start], len);
-	      out_idx += len;
-	    }
 	}
-
-      buf2[out_idx] = '\0';
+      else
+	{
+	  // Something doesn't exist, or we don't have permission to
+	  // read it, or a previous path component is a directory, or
+	  // a symlink is looped.  Whatever, we can't check the
+	  // filesystem any more.
+	  fschecks = false;
+	}
+#endif // HAVE_LSTAT && HAVE_READLINK
     }
+  dst[dsti] = '\0';
 
   // FIXME: what encoding to assume for file names?  This affects many
   // calls.
-  return JvNewStringUTF (buf2);
-#else
-  return JvNewStringUTF (buf);
-#endif
+  path = JvNewStringUTF (dst);
+  _Jv_Free (src);
+  return path;
 }
 
 jboolean
Index: libjava/java/io/File.java
===================================================================
--- libjava/java/io/File.java	(revision 112529)
+++ libjava/java/io/File.java	(working copy)
@@ -508,9 +508,9 @@
   /**
    * This method returns a canonical representation of the pathname of
    * this file.  The actual form of the canonical representation is
-   * different.  On the GNU system, the canonical form differs from the
-   * absolute form in that all relative file references to "." and ".."
-   * are resolved and removed.
+   * system-dependent.  On the GNU system, conversion to canonical
+   * form involves the removal of redundant separators, references to
+   * "." and "..", and symbolic links.
    * <p>
    * Note that this method, unlike the other methods which return path
    * names, can throw an IOException.  This is because native method 
Index: libjava/configure.ac
===================================================================
--- libjava/configure.ac	(revision 112529)
+++ libjava/configure.ac	(working copy)
@@ -895,7 +895,7 @@
 else
    AC_CHECK_FUNCS([strerror ioctl select fstat open fsync sleep opendir \
                    gmtime_r localtime_r readdir_r getpwuid_r getcwd \
-		   access stat mkdir rename rmdir unlink realpath utime chmod \
+		   access stat lstat mkdir rename rmdir unlink utime chmod readlink \
 		   nl_langinfo setlocale \
 		   inet_pton uname inet_ntoa \
 		   fork execvp pipe sigaction ftruncate])
Index: libjava/configure
===================================================================
--- libjava/configure	(revision 112529)
+++ libjava/configure	(working copy)
@@ -9388,9 +9388,9 @@
 
 
 
 for ac_func in strerror ioctl select fstat open fsync sleep opendir \
                    gmtime_r localtime_r readdir_r getpwuid_r getcwd \
-		   access stat mkdir rename rmdir unlink realpath utime chmod \
+		   access stat lstat mkdir rename rmdir unlink utime chmod readlink \
 		   nl_langinfo setlocale \
 		   inet_pton uname inet_ntoa \
 		   fork execvp pipe sigaction ftruncate
Index: libjava/include/config.h.in
===================================================================
--- libjava/include/config.h.in	(revision 112529)
+++ libjava/include/config.h.in	(working copy)
@@ -172,6 +172,9 @@
 /* Define to 1 if you have the `localtime_r' function. */
 #undef HAVE_LOCALTIME_R
 
+/* Define to 1 if you have the `lstat' function. */
+#undef HAVE_LSTAT
+
 /* Define to 1 if you have the `memcpy' function. */
 #undef HAVE_MEMCPY
 
@@ -229,8 +232,8 @@
 /* Define to 1 if you have the `readdir_r' function. */
 #undef HAVE_READDIR_R
 
-/* Define to 1 if you have the `realpath' function. */
-#undef HAVE_REALPATH
+/* Define to 1 if you have the `readlink' function. */
+#undef HAVE_READLINK
 
 /* Define to 1 if you have the `rename' function. */
 #undef HAVE_RENAME

[-- Attachment #3: canon.cc --]
[-- Type: text/plain, Size: 3280 bytes --]

jstring
java::io::File::getCanonicalPath (void)
{
  jstring path = getAbsolutePath ();

  int len = JvGetStringUTFLength (path);
  if (len >= MAXPATHLEN)
    throw new IOException (JvNewStringLatin1 ("Path too long"));

  char *src = (char *) _Jv_Malloc (MAXPATHLEN * 3);
  char *dst = src + MAXPATHLEN;
  char *tmp = dst + MAXPATHLEN;

  JvGetStringUTFRegion (path, 0, path->length(), src);
  src[len] = '\0';
  int srci = 1;

  dst[0] = '/';
  int dsti = 1;

  bool fschecks = true;

  while (src[srci] != '\0')
    {
      // Skip slashes.
      while (src[srci] == '/')
	srci++;
      int tmpi = srci;
      // Find next slash.
      while (src[srci] != '/' && src[srci] != '\0')
	srci++;
      if (srci == tmpi)
	// We hit the end.
	break;
      len = srci - tmpi;

      // Handle "." and "..".
      if (len == 1 && src[tmpi] == '.')
	continue;
      if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
	{
	  if (dsti == 1)
	    {
	      // Unlike other JVMs we do not rewind past the root
	      // directory.  I can't see any legitimate reason why you
	      // would want this, and chopping off bits of path seems
	      // like a sure-fire way to introduce vulnerabilities.
	      _Jv_Free (src);
	      throw new IOException (
		JvNewStringLatin1 ("Too many up-level references"));
	    }
	  while (dsti > 1 && dst[dsti - 1] != '/')
	    dsti--;
	  if (dsti != 1)
	    dsti--;
	  // Reenable filesystem checking if disabled, as we might
	  // have reversed over whatever caused the problem before.
	  // At least one proprietary JVM has inconsistencies because
	  // it does not do this.
	  fschecks = true;
	  continue;
	}

      // Handle real path components.
      if (dsti + len + 1 >= MAXPATHLEN)
	{
	  _Jv_Free (src);
	  throw new IOException (JvNewStringLatin1 ("Path too long"));
	}
      int dsti_save = dsti;
      if (dsti > 1)
	dst[dsti++] = '/';
      strncpy (&dst[dsti], &src[tmpi], len);
      dsti += len;
      if (fschecks == false)
	continue;

#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
      struct stat sb;
      dst[dsti] = '\0';
      if (::lstat (dst, &sb) == 0)
	{
	  if (S_ISLNK (sb.st_mode))
	    {
	      tmpi = ::readlink (dst, tmp, MAXPATHLEN);
	      if (tmpi < 1 || tmpi == MAXPATHLEN)
		{
		  _Jv_Free (src);
		  throw new IOException (JvNewStringLatin1 ("Path too long"));
		}

	      // Prepend the link's path to src.
	      if (tmpi + strlen (&src[srci]) >= MAXPATHLEN)
		{
		  _Jv_Free (src);
		  throw new IOException (JvNewStringLatin1 ("Path too long"));
		}
	      while (src[srci] != '\0')
		tmp[tmpi++] = src[srci++];
	      tmp[tmpi] = '\0';
	      strcpy (src, tmp);
	      srci = 0;

	      // Either replace or append dst depending on whether the
	      // link is relative or absolute.
	      dsti = tmp[0] == '/' ? 1 : dsti_save;
	    }
	}
      else
	{
	  // Something doesn't exist, or we don't have permission to
	  // read it, or a previous path component is a directory, or
	  // a symlink is looped.  Whatever, we can't check the
	  // filesystem any more.
	  fschecks = false;
	}
#endif // HAVE_LSTAT && HAVE_READLINK
    }
  dst[dsti] = '\0';

  // FIXME: what encoding to assume for file names?  This affects many
  // calls.
  path = JvNewStringUTF (dst);
  _Jv_Free (src);
  return path;
}

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2006-06-06 15:18 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-04-03 12:32 Patch: rewrite File.toCanonicalPath() for GNU/Posix systems Gary Benson
2006-04-05 22:00 ` Tom Tromey
2006-04-06 10:03   ` Gary Benson
2006-04-07 15:53 ` Patch: rewrite File.toCanonicalPath() take 2 Gary Benson
2006-04-12 10:32   ` Andrew Haley
2006-04-19 13:55     ` Patch: rewrite File.toCanonicalPath() take 3 Gary Benson
2006-05-12  8:16       ` PING: rewrite File.toCanonicalPath() Gary Benson
2006-05-31  8:21         ` Second ping: " Gary Benson
2006-06-01  7:01           ` Bryce McKinlay
2006-06-05 12:07             ` Gary Benson
2006-06-05 15:13               ` Andrew Haley
2006-06-06 15:18             ` Patch: FYI: " Gary Benson
2006-06-01  7:54           ` Second ping: " Bryce McKinlay
2006-06-01  8:12             ` Gary Benson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).