From: Gary Benson <gbenson@redhat.com>
To: java-patches@gcc.gnu.org
Subject: Patch: FYI: rewrite File.toCanonicalPath()
Date: Tue, 06 Jun 2006 15:18:00 -0000 [thread overview]
Message-ID: <20060606151701.GA3511@redhat.com> (raw)
In-Reply-To: <447E90CF.6070905@redhat.com>
[-- Attachment #1: Type: text/plain, Size: 450 bytes --]
Hi all,
I just checked in a rewrite of File.toCanonicalPath() for GNU/Posix
systems to make it correctly handle symbolic links as a partial fix
for PR 24895.
Cheers,
Gary
2006-06-06 Gary Benson <gbenson@redhat.com>
* java/io/natFilePosix.cc (getCanonicalPath): Rewritten.
* configure.ac: Remove realpath check and add checks for
lstat and readlink.
* configure: Rebuilt.
* include/config.h.in: Likewise.
* java/io/File.java: Javadoc fix.
[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 9944 bytes --]
Index: java/io/natFilePosix.cc
===================================================================
--- java/io/natFilePosix.cc (revision 114425)
+++ java/io/natFilePosix.cc (working copy)
@@ -101,94 +101,167 @@
#endif
}
+// These two methods are used to maintain dynamically allocated
+// buffers for getCanonicalPath without the overhead of calling
+// realloc every time a buffer is modified. Buffers are sized
+// at the smallest multiple of CHUNKSIZ that is greater than or
+// equal to the desired length. The default CHUNKSIZ is 256,
+// longer than most paths, so in most cases a getCanonicalPath
+// will require only one malloc per buffer.
+
+#define CHUNKLOG 8
+#define CHUNKSIZ (1 << CHUNKLOG)
+
+static int
+nextChunkSize (int size)
+{
+ return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG;
+}
+
+static char *
+maybeGrowBuf (char *buf, int *size, int required)
+{
+ if (required > *size)
+ {
+ *size = nextChunkSize (required);
+ buf = (char *) _Jv_Realloc (buf, *size);
+ }
+ return buf;
+}
+
+// Return a canonical representation of the pathname of this file. On
+// the GNU system this involves the removal of redundant separators,
+// references to "." and "..", and symbolic links.
+//
+// The conversion proceeds on a component-by-component basis: symbolic
+// links and references to ".." are resolved as and when they occur.
+// This means that if "/foo/bar" is a symbolic link to "/baz" then the
+// canonical form of "/foo/bar/.." is "/" and not "/foo".
+//
+// In order to mimic the behaviour of proprietary JVMs, non-existant
+// path components are allowed (a departure from the normal GNU system
+// convention). This means that if "/foo/bar" is a symbolic link to
+// "/baz", the canonical form of "/non-existant-directory/../foo/bar"
+// is "/baz".
+
jstring
java::io::File::getCanonicalPath (void)
{
- // We use `+2' here because we might need to use `.' for our special
- // case.
- char *buf = (char *) __builtin_alloca (JvGetStringUTFLength (path) + 2);
- char buf2[MAXPATHLEN];
- jsize total = JvGetStringUTFRegion (path, 0, path->length(), buf);
+ jstring path = getAbsolutePath ();
- // Special case: treat "" the same as ".".
- if (total == 0)
- buf[total++] = '.';
+ int len = JvGetStringUTFLength (path);
+ int srcl = nextChunkSize (len + 1);
+ char *src = (char *) _Jv_Malloc (srcl);
+ JvGetStringUTFRegion (path, 0, path->length(), src);
+ src[len] = '\0';
+ int srci = 1;
- buf[total] = '\0';
+ int dstl = nextChunkSize (2);
+ char *dst = (char *) _Jv_Malloc (dstl);
+ dst[0] = '/';
+ int dsti = 1;
-#ifdef HAVE_REALPATH
- if (realpath (buf, buf2) == NULL)
+ bool fschecks = true;
+
+ while (src[srci] != '\0')
{
- // If realpath failed, we have to come up with a canonical path
- // anyway. We do this with purely textual manipulation.
- // FIXME: this isn't perfect. You can construct a case where
- // we get a different answer from the JDK:
- // mkdir -p /tmp/a/b/c
- // ln -s /tmp/a/b /tmp/a/z
- // ... getCanonicalPath("/tmp/a/z/c/nosuchfile")
- // We will give /tmp/a/z/c/nosuchfile, while the JDK will
- // give /tmp/a/b/c/nosuchfile.
- int out_idx;
- if (buf[0] != '/')
+ // Skip slashes.
+ while (src[srci] == '/')
+ srci++;
+ int tmpi = srci;
+ // Find next slash.
+ while (src[srci] != '/' && src[srci] != '\0')
+ srci++;
+ if (srci == tmpi)
+ // We hit the end.
+ break;
+ len = srci - tmpi;
+
+ // Handle "." and "..".
+ if (len == 1 && src[tmpi] == '.')
+ continue;
+ if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
{
- // Not absolute, so start with current directory.
- if (getcwd (buf2, sizeof (buf2)) == NULL)
- throw new IOException ();
- out_idx = strlen (buf2);
+ while (dsti > 1 && dst[dsti - 1] != '/')
+ dsti--;
+ if (dsti != 1)
+ dsti--;
+ // Reenable filesystem checking if disabled, as we might
+ // have reversed over whatever caused the problem before.
+ // At least one proprietary JVM has inconsistencies because
+ // it does not do this.
+ fschecks = true;
+ continue;
}
- else
+
+ // Handle real path components.
+ dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1);
+ int dsti_save = dsti;
+ if (dsti > 1)
+ dst[dsti++] = '/';
+ strncpy (&dst[dsti], &src[tmpi], len);
+ dsti += len;
+ if (fschecks == false)
+ continue;
+
+#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
+ struct stat sb;
+ dst[dsti] = '\0';
+ if (::lstat (dst, &sb) == 0)
{
- buf2[0] = '/';
- out_idx = 1;
- }
- int in_idx = 0;
- while (buf[in_idx] != '\0')
- {
- // Skip '/'s.
- while (buf[in_idx] == '/')
- ++in_idx;
- int elt_start = in_idx;
- // Find next '/' or end of path.
- while (buf[in_idx] != '\0' && buf[in_idx] != '/')
- ++in_idx;
- if (in_idx == elt_start)
+ if (S_ISLNK (sb.st_mode))
{
- // An empty component means we've reached the end.
- break;
+ int tmpl = CHUNKSIZ;
+ char *tmp = (char *) _Jv_Malloc (tmpl);
+
+ while (1)
+ {
+ tmpi = ::readlink (dst, tmp, tmpl);
+ if (tmpi < 1)
+ {
+ _Jv_Free (src);
+ _Jv_Free (dst);
+ _Jv_Free (tmp);
+ throw new IOException (
+ JvNewStringLatin1 ("readlink failed"));
+ }
+ if (tmpi < tmpl)
+ break;
+ tmpl += CHUNKSIZ;
+ tmp = (char *) _Jv_Realloc (tmp, tmpl);
+ }
+
+ // Prepend the link's path to src.
+ tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1);
+ strcpy(&tmp[tmpi], &src[srci]);
+ _Jv_Free (src);
+ src = tmp;
+ srcl = tmpl;
+ srci = 0;
+
+ // Either replace or append dst depending on whether the
+ // link is relative or absolute.
+ dsti = src[0] == '/' ? 1 : dsti_save;
}
- int len = in_idx - elt_start;
- if (len == 1 && buf[in_idx] == '.')
- continue;
- if (len == 2 && buf[in_idx] == '.' && buf[in_idx + 1] == '.')
- {
- // Found ".." component, lop off last part from existing
- // buffer.
- --out_idx;
- while (out_idx > 0 && buf2[out_idx] != '/')
- --out_idx;
- // Can't go up past "/".
- if (out_idx == 0)
- ++out_idx;
- }
- else
- {
- // Append a real path component to the output.
- if (out_idx > 1)
- buf2[out_idx++] = '/';
- strncpy (&buf2[out_idx], &buf[elt_start], len);
- out_idx += len;
- }
}
-
- buf2[out_idx] = '\0';
+ else
+ {
+ // Something doesn't exist, or we don't have permission to
+ // read it, or a previous path component is a directory, or
+ // a symlink is looped. Whatever, we can't check the
+ // filesystem any more.
+ fschecks = false;
+ }
+#endif // HAVE_LSTAT && HAVE_READLINK
}
+ dst[dsti] = '\0';
// FIXME: what encoding to assume for file names? This affects many
// calls.
- return JvNewStringUTF (buf2);
-#else
- return JvNewStringUTF (buf);
-#endif
+ path = JvNewStringUTF (dst);
+ _Jv_Free (src);
+ _Jv_Free (dst);
+ return path;
}
jboolean
Index: configure.ac
===================================================================
--- configure.ac (revision 114425)
+++ configure.ac (working copy)
@@ -897,7 +897,7 @@
else
AC_CHECK_FUNCS([strerror ioctl select fstat open fsync sleep opendir \
gmtime_r localtime_r readdir_r getpwuid_r getcwd \
- access stat mkdir rename rmdir unlink realpath utime chmod \
+ access stat lstat mkdir rename rmdir unlink utime chmod readlink \
nl_langinfo setlocale \
inet_pton uname inet_ntoa \
fork execvp pipe sigaction ftruncate mmap])
Index: configure
===================================================================
--- configure (revision 114425)
+++ configure (working copy)
@@ -10396,7 +10396,7 @@
for ac_func in strerror ioctl select fstat open fsync sleep opendir \
gmtime_r localtime_r readdir_r getpwuid_r getcwd \
- access stat mkdir rename rmdir unlink realpath utime chmod \
+ access stat lstat mkdir rename rmdir unlink utime chmod readlink \
nl_langinfo setlocale \
inet_pton uname inet_ntoa \
fork execvp pipe sigaction ftruncate mmap
Index: include/config.h.in
===================================================================
--- include/config.h.in (revision 114425)
+++ include/config.h.in (working copy)
@@ -169,6 +169,9 @@
/* Define to 1 if you have the `localtime_r' function. */
#undef HAVE_LOCALTIME_R
+/* Define to 1 if you have the `lstat' function. */
+#undef HAVE_LSTAT
+
/* Define to 1 if you have the `memcpy' function. */
#undef HAVE_MEMCPY
@@ -226,8 +229,8 @@
/* Define to 1 if you have the `readdir_r' function. */
#undef HAVE_READDIR_R
-/* Define to 1 if you have the `realpath' function. */
-#undef HAVE_REALPATH
+/* Define to 1 if you have the `readlink' function. */
+#undef HAVE_READLINK
/* Define to 1 if you have the `rename' function. */
#undef HAVE_RENAME
Index: java/io/File.java
===================================================================
--- java/io/File.java (revision 114425)
+++ java/io/File.java (working copy)
@@ -508,9 +508,9 @@
/**
* This method returns a canonical representation of the pathname of
* this file. The actual form of the canonical representation is
- * different. On the GNU system, the canonical form differs from the
- * absolute form in that all relative file references to "." and ".."
- * are resolved and removed.
+ * system-dependent. On the GNU system, conversion to canonical
+ * form involves the removal of redundant separators, references to
+ * "." and "..", and symbolic links.
* <p>
* Note that this method, unlike the other methods which return path
* names, can throw an IOException. This is because native method
next prev parent reply other threads:[~2006-06-06 15:18 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-03 12:32 Patch: rewrite File.toCanonicalPath() for GNU/Posix systems Gary Benson
2006-04-05 22:00 ` Tom Tromey
2006-04-06 10:03 ` Gary Benson
2006-04-07 15:53 ` Patch: rewrite File.toCanonicalPath() take 2 Gary Benson
2006-04-12 10:32 ` Andrew Haley
2006-04-19 13:55 ` Patch: rewrite File.toCanonicalPath() take 3 Gary Benson
2006-05-12 8:16 ` PING: rewrite File.toCanonicalPath() Gary Benson
2006-05-31 8:21 ` Second ping: " Gary Benson
2006-06-01 7:01 ` Bryce McKinlay
2006-06-05 12:07 ` Gary Benson
2006-06-05 15:13 ` Andrew Haley
2006-06-06 15:18 ` Gary Benson [this message]
2006-06-01 7:54 ` Bryce McKinlay
2006-06-01 8:12 ` Gary Benson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060606151701.GA3511@redhat.com \
--to=gbenson@redhat.com \
--cc=java-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).