public inbox for java-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Gary Benson <gbenson@redhat.com>
To: java-patches@gcc.gnu.org
Subject: Second ping: rewrite File.toCanonicalPath()
Date: Wed, 31 May 2006 08:21:00 -0000	[thread overview]
Message-ID: <20060531082025.GA23651@redhat.com> (raw)
In-Reply-To: <20060512081444.GB5188@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 519 bytes --]

Hi all,

A couple of months ago I posted a patch that rewrote File.toCanonicalPath()
on GNU/Posix systems in order for FilePermission checks to work properly,
so much of the security framework relies on it.  I rewrote it a couple of
times at various people's requests, but this (final?) version met with no
response whatsoever.

Can someone please approve it or tell me what's wrong with it?

As before the diff of the method itself is not very legible so I've
attached a copy of it as well as the patch.

Cheers,
Gary

[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 8850 bytes --]

Index: libjava/java/io/natFilePosix.cc
===================================================================
--- libjava/java/io/natFilePosix.cc	(revision 112529)
+++ libjava/java/io/natFilePosix.cc	(working copy)
@@ -101,94 +101,144 @@
 #endif
 }
 
+#define CHUNKLOG 8
+#define CHUNKSIZ (1 << CHUNKLOG)
+
+static int
+nextChunkSize (int size)
+{
+  return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG;
+}
+
+static char *
+maybeGrowBuf (char *buf, int *size, int required)
+{
+  if (required > *size)
+    {
+      *size = nextChunkSize (required);
+      buf = (char *) _Jv_Realloc (buf, *size);
+    }
+  return buf;
+}
+
 jstring
 java::io::File::getCanonicalPath (void)
 {
-  // We use `+2' here because we might need to use `.' for our special
-  // case.
-  char *buf = (char *) __builtin_alloca (JvGetStringUTFLength (path) + 2);
-  char buf2[MAXPATHLEN];
-  jsize total = JvGetStringUTFRegion (path, 0, path->length(), buf);
+  jstring path = getAbsolutePath ();
 
-  // Special case: treat "" the same as ".".
-  if (total == 0)
-    buf[total++] = '.';
+  int len = JvGetStringUTFLength (path);
+  int srcl = nextChunkSize (len + 1);
+  char *src = (char *) _Jv_Malloc (srcl);
+  JvGetStringUTFRegion (path, 0, path->length(), src);
+  src[len] = '\0';
+  int srci = 1;
 
-  buf[total] = '\0';
+  int dstl = nextChunkSize (2);  
+  char *dst = (char *) _Jv_Malloc (dstl);
+  dst[0] = '/';
+  int dsti = 1;
 
-#ifdef HAVE_REALPATH
-  if (realpath (buf, buf2) == NULL)
+  bool fschecks = true;
+
+  while (src[srci] != '\0')
     {
-      // If realpath failed, we have to come up with a canonical path
-      // anyway.  We do this with purely textual manipulation.
-      // FIXME: this isn't perfect.  You can construct a case where
-      // we get a different answer from the JDK:
-      // mkdir -p /tmp/a/b/c
-      // ln -s /tmp/a/b /tmp/a/z
-      // ... getCanonicalPath("/tmp/a/z/c/nosuchfile")
-      // We will give /tmp/a/z/c/nosuchfile, while the JDK will
-      // give /tmp/a/b/c/nosuchfile.
-      int out_idx;
-      if (buf[0] != '/')
+      // Skip slashes.
+      while (src[srci] == '/')
+	srci++;
+      int tmpi = srci;
+      // Find next slash.
+      while (src[srci] != '/' && src[srci] != '\0')
+	srci++;
+      if (srci == tmpi)
+	// We hit the end.
+	break;
+      len = srci - tmpi;
+
+      // Handle "." and "..".
+      if (len == 1 && src[tmpi] == '.')
+	continue;
+      if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
 	{
-	  // Not absolute, so start with current directory.
-	  if (getcwd (buf2, sizeof (buf2)) == NULL)
-	    throw new IOException ();
-	  out_idx = strlen (buf2);
+	  while (dsti > 1 && dst[dsti - 1] != '/')
+	    dsti--;
+	  if (dsti != 1)
+	    dsti--;
+	  // Reenable filesystem checking if disabled, as we might
+	  // have reversed over whatever caused the problem before.
+	  // At least one proprietary JVM has inconsistencies because
+	  // it does not do this.
+	  fschecks = true;
+	  continue;
 	}
-      else
+
+      // Handle real path components.
+      dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1);
+      int dsti_save = dsti;
+      if (dsti > 1)
+	dst[dsti++] = '/';
+      strncpy (&dst[dsti], &src[tmpi], len);
+      dsti += len;
+      if (fschecks == false)
+	continue;
+
+#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
+      struct stat sb;
+      dst[dsti] = '\0';
+      if (::lstat (dst, &sb) == 0)
 	{
-	  buf2[0] = '/';
-	  out_idx = 1;
-	} 
-      int in_idx = 0;
-      while (buf[in_idx] != '\0')
-	{
-	  // Skip '/'s.
-	  while (buf[in_idx] == '/')
-	    ++in_idx;
-	  int elt_start = in_idx;
-	  // Find next '/' or end of path.
-	  while (buf[in_idx] != '\0' && buf[in_idx] != '/')
-	    ++in_idx;
-	  if (in_idx == elt_start)
+	  if (S_ISLNK (sb.st_mode))
 	    {
-	      // An empty component means we've reached the end.
-	      break;
+	      int tmpl = CHUNKSIZ;
+	      char *tmp = (char *) _Jv_Malloc (tmpl);
+
+	      while (1)
+		{
+		  tmpi = ::readlink (dst, tmp, tmpl);
+		  if (tmpi < 1)
+		    {
+		      _Jv_Free (src);
+		      _Jv_Free (dst);
+		      _Jv_Free (tmp);
+		      throw new IOException (
+			JvNewStringLatin1 ("readlink failed"));
+		    }
+		  if (tmpi < tmpl)
+		    break;
+		  tmpl += CHUNKSIZ;
+		  tmp = (char *) _Jv_Realloc (tmp, tmpl);
+		}
+
+	      // Prepend the link's path to src.
+	      tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1);
+	      strcpy(&tmp[tmpi], &src[srci]);
+	      _Jv_Free (src);
+	      src = tmp;
+	      srcl = tmpl;
+	      srci = 0;
+
+	      // Either replace or append dst depending on whether the
+	      // link is relative or absolute.
+	      dsti = src[0] == '/' ? 1 : dsti_save;
 	    }
-	  int len = in_idx - elt_start;
-	  if (len == 1 && buf[in_idx] == '.')
-	    continue;
-	  if (len == 2 && buf[in_idx] == '.' && buf[in_idx + 1] == '.')
-	    {
-	      // Found ".." component, lop off last part from existing
-	      // buffer.
-	      --out_idx;
-	      while (out_idx > 0 && buf2[out_idx] != '/')
-		--out_idx;
-	      // Can't go up past "/".
-	      if (out_idx == 0)
-		++out_idx;
-	    }
-	  else
-	    {
-	      // Append a real path component to the output.
-	      if (out_idx > 1)
-		buf2[out_idx++] = '/';
-	      strncpy (&buf2[out_idx], &buf[elt_start], len);
-	      out_idx += len;
-	    }
 	}
-
-      buf2[out_idx] = '\0';
+      else
+	{
+	  // Something doesn't exist, or we don't have permission to
+	  // read it, or a previous path component is a directory, or
+	  // a symlink is looped.  Whatever, we can't check the
+	  // filesystem any more.
+	  fschecks = false;
+	}
+#endif // HAVE_LSTAT && HAVE_READLINK
     }
+  dst[dsti] = '\0';
 
   // FIXME: what encoding to assume for file names?  This affects many
   // calls.
-  return JvNewStringUTF (buf2);
-#else
-  return JvNewStringUTF (buf);
-#endif
+  path = JvNewStringUTF (dst);
+  _Jv_Free (src);
+  _Jv_Free (dst);
+  return path;
 }
 
 jboolean
Index: libjava/java/io/File.java
===================================================================
--- libjava/java/io/File.java	(revision 112529)
+++ libjava/java/io/File.java	(working copy)
@@ -508,9 +508,9 @@
   /**
    * This method returns a canonical representation of the pathname of
    * this file.  The actual form of the canonical representation is
-   * different.  On the GNU system, the canonical form differs from the
-   * absolute form in that all relative file references to "." and ".."
-   * are resolved and removed.
+   * system-dependent.  On the GNU system, conversion to canonical
+   * form involves the removal of redundant separators, references to
+   * "." and "..", and symbolic links.
    * <p>
    * Note that this method, unlike the other methods which return path
    * names, can throw an IOException.  This is because native method 
Index: libjava/configure.ac
===================================================================
--- libjava/configure.ac	(revision 112529)
+++ libjava/configure.ac	(working copy)
@@ -895,7 +895,7 @@
 else
    AC_CHECK_FUNCS([strerror ioctl select fstat open fsync sleep opendir \
                    gmtime_r localtime_r readdir_r getpwuid_r getcwd \
-		   access stat mkdir rename rmdir unlink realpath utime chmod \
+		   access stat lstat mkdir rename rmdir unlink utime chmod readlink \
 		   nl_langinfo setlocale \
 		   inet_pton uname inet_ntoa \
 		   fork execvp pipe sigaction ftruncate])
Index: libjava/configure
===================================================================
--- libjava/configure	(revision 112529)
+++ libjava/configure	(working copy)
@@ -9388,9 +9388,10 @@
 
 
 
+
 for ac_func in strerror ioctl select fstat open fsync sleep opendir \
                    gmtime_r localtime_r readdir_r getpwuid_r getcwd \
-		   access stat mkdir rename rmdir unlink realpath utime chmod \
+		   access stat lstat mkdir rename rmdir unlink utime chmod readlink \
 		   nl_langinfo setlocale \
 		   inet_pton uname inet_ntoa \
 		   fork execvp pipe sigaction ftruncate
Index: libjava/include/config.h.in
===================================================================
--- libjava/include/config.h.in	(revision 112529)
+++ libjava/include/config.h.in	(working copy)
@@ -172,6 +172,9 @@
 /* Define to 1 if you have the `localtime_r' function. */
 #undef HAVE_LOCALTIME_R
 
+/* Define to 1 if you have the `lstat' function. */
+#undef HAVE_LSTAT
+
 /* Define to 1 if you have the `memcpy' function. */
 #undef HAVE_MEMCPY
 
@@ -229,8 +232,8 @@
 /* Define to 1 if you have the `readdir_r' function. */
 #undef HAVE_READDIR_R
 
-/* Define to 1 if you have the `realpath' function. */
-#undef HAVE_REALPATH
+/* Define to 1 if you have the `readlink' function. */
+#undef HAVE_READLINK
 
 /* Define to 1 if you have the `rename' function. */
 #undef HAVE_RENAME

[-- Attachment #3: canon.cc --]
[-- Type: text/plain, Size: 3334 bytes --]

#define CHUNKLOG 8
#define CHUNKSIZ (1 << CHUNKLOG)

static int
nextChunkSize (int size)
{
  return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG;
}

static char *
maybeGrowBuf (char *buf, int *size, int required)
{
  if (required > *size)
    {
      *size = nextChunkSize (required);
      buf = (char *) _Jv_Realloc (buf, *size);
    }
  return buf;
}

jstring
java::io::File::getCanonicalPath (void)
{
  jstring path = getAbsolutePath ();

  int len = JvGetStringUTFLength (path);
  int srcl = nextChunkSize (len + 1);
  char *src = (char *) _Jv_Malloc (srcl);
  JvGetStringUTFRegion (path, 0, path->length(), src);
  src[len] = '\0';
  int srci = 1;

  int dstl = nextChunkSize (2);  
  char *dst = (char *) _Jv_Malloc (dstl);
  dst[0] = '/';
  int dsti = 1;

  bool fschecks = true;

  while (src[srci] != '\0')
    {
      // Skip slashes.
      while (src[srci] == '/')
	srci++;
      int tmpi = srci;
      // Find next slash.
      while (src[srci] != '/' && src[srci] != '\0')
	srci++;
      if (srci == tmpi)
	// We hit the end.
	break;
      len = srci - tmpi;

      // Handle "." and "..".
      if (len == 1 && src[tmpi] == '.')
	continue;
      if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
	{
	  while (dsti > 1 && dst[dsti - 1] != '/')
	    dsti--;
	  if (dsti != 1)
	    dsti--;
	  // Reenable filesystem checking if disabled, as we might
	  // have reversed over whatever caused the problem before.
	  // At least one proprietary JVM has inconsistencies because
	  // it does not do this.
	  fschecks = true;
	  continue;
	}

      // Handle real path components.
      dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1);
      int dsti_save = dsti;
      if (dsti > 1)
	dst[dsti++] = '/';
      strncpy (&dst[dsti], &src[tmpi], len);
      dsti += len;
      if (fschecks == false)
	continue;

#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
      struct stat sb;
      dst[dsti] = '\0';
      if (::lstat (dst, &sb) == 0)
	{
	  if (S_ISLNK (sb.st_mode))
	    {
	      int tmpl = CHUNKSIZ;
	      char *tmp = (char *) _Jv_Malloc (tmpl);

	      while (1)
		{
		  tmpi = ::readlink (dst, tmp, tmpl);
		  if (tmpi < 1)
		    {
		      _Jv_Free (src);
		      _Jv_Free (dst);
		      _Jv_Free (tmp);
		      throw new IOException (
			JvNewStringLatin1 ("readlink failed"));
		    }
		  if (tmpi < tmpl)
		    break;
		  tmpl += CHUNKSIZ;
		  tmp = (char *) _Jv_Realloc (tmp, tmpl);
		}

	      // Prepend the link's path to src.
	      tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1);
	      strcpy(&tmp[tmpi], &src[srci]);
	      _Jv_Free (src);
	      src = tmp;
	      srcl = tmpl;
	      srci = 0;

	      // Either replace or append dst depending on whether the
	      // link is relative or absolute.
	      dsti = src[0] == '/' ? 1 : dsti_save;
	    }
	}
      else
	{
	  // Something doesn't exist, or we don't have permission to
	  // read it, or a previous path component is a directory, or
	  // a symlink is looped.  Whatever, we can't check the
	  // filesystem any more.
	  fschecks = false;
	}
#endif // HAVE_LSTAT && HAVE_READLINK
    }
  dst[dsti] = '\0';

  // FIXME: what encoding to assume for file names?  This affects many
  // calls.
  path = JvNewStringUTF (dst);
  _Jv_Free (src);
  _Jv_Free (dst);
  return path;
}

  reply	other threads:[~2006-05-31  8:21 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-04-03 12:32 Patch: rewrite File.toCanonicalPath() for GNU/Posix systems Gary Benson
2006-04-05 22:00 ` Tom Tromey
2006-04-06 10:03   ` Gary Benson
2006-04-07 15:53 ` Patch: rewrite File.toCanonicalPath() take 2 Gary Benson
2006-04-12 10:32   ` Andrew Haley
2006-04-19 13:55     ` Patch: rewrite File.toCanonicalPath() take 3 Gary Benson
2006-05-12  8:16       ` PING: rewrite File.toCanonicalPath() Gary Benson
2006-05-31  8:21         ` Gary Benson [this message]
2006-06-01  7:01           ` Second ping: " Bryce McKinlay
2006-06-05 12:07             ` Gary Benson
2006-06-05 15:13               ` Andrew Haley
2006-06-06 15:18             ` Patch: FYI: " Gary Benson
2006-06-01  7:54           ` Second ping: " Bryce McKinlay
2006-06-01  8:12             ` Gary Benson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060531082025.GA23651@redhat.com \
    --to=gbenson@redhat.com \
    --cc=java-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).