From: Gary Benson <gbenson@redhat.com>
To: java-patches@gcc.gnu.org
Subject: Patch: rewrite File.toCanonicalPath() take 3
Date: Wed, 19 Apr 2006 13:55:00 -0000 [thread overview]
Message-ID: <20060419135530.GA12427@redhat.com> (raw)
In-Reply-To: <17468.55086.917234.44472@zapata.pink>
[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]
Andrew Haley wrote:
> Gary Benson writes:
> > Here's a second draft of my rewritten File.toCanonicalPath(). If
> > you missed my previous email, a working File.toCanonicalPath() is
> > required in order for FilePermission checks to work properly.
> > GCJ's present implementation of this has a number of drawbacks,
> > which this patch fixes.
> >
> > This patch improves upon my previous one in two ways:
> >
> > 1. Following comments on the Classpath mailing list, storage for
> > paths is now allocated dynamically.
> > 2. Following comments on this list, the "no-rewind-past-root"
> > check has been removed.
> >
> > As before the diff of the method itself is not very legible so
> > I've attached a copy of it as well as the patch.
>
> The repeated use of realloc() in this method is rather nasty. It
> must surely be possible to grow the buffer in some way that doesn't
> require this. Just pre-allocate the buffer and grow it iff it's
> insufficient. 256 bytes or so will be adequate in most cases.
>
> I know tromey said that realloc() is not slow when compared with
> readlink(), and this is surely true, but I can't accept that we
> should unnecessarily make may calls to realloc() for that reason.
How does this look?
Cheers,
Gary
[-- Attachment #2: patch --]
[-- Type: text/plain, Size: 8850 bytes --]
Index: libjava/java/io/natFilePosix.cc
===================================================================
--- libjava/java/io/natFilePosix.cc (revision 112529)
+++ libjava/java/io/natFilePosix.cc (working copy)
@@ -101,94 +101,144 @@
#endif
}
+#define CHUNKLOG 8
+#define CHUNKSIZ (1 << CHUNKLOG)
+
+static int
+nextChunkSize (int size)
+{
+ return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG;
+}
+
+static char *
+maybeGrowBuf (char *buf, int *size, int required)
+{
+ if (required > *size)
+ {
+ *size = nextChunkSize (required);
+ buf = (char *) _Jv_Realloc (buf, *size);
+ }
+ return buf;
+}
+
jstring
java::io::File::getCanonicalPath (void)
{
- // We use `+2' here because we might need to use `.' for our special
- // case.
- char *buf = (char *) __builtin_alloca (JvGetStringUTFLength (path) + 2);
- char buf2[MAXPATHLEN];
- jsize total = JvGetStringUTFRegion (path, 0, path->length(), buf);
+ jstring path = getAbsolutePath ();
- // Special case: treat "" the same as ".".
- if (total == 0)
- buf[total++] = '.';
+ int len = JvGetStringUTFLength (path);
+ int srcl = nextChunkSize (len + 1);
+ char *src = (char *) _Jv_Malloc (srcl);
+ JvGetStringUTFRegion (path, 0, path->length(), src);
+ src[len] = '\0';
+ int srci = 1;
- buf[total] = '\0';
+ int dstl = nextChunkSize (2);
+ char *dst = (char *) _Jv_Malloc (dstl);
+ dst[0] = '/';
+ int dsti = 1;
-#ifdef HAVE_REALPATH
- if (realpath (buf, buf2) == NULL)
+ bool fschecks = true;
+
+ while (src[srci] != '\0')
{
- // If realpath failed, we have to come up with a canonical path
- // anyway. We do this with purely textual manipulation.
- // FIXME: this isn't perfect. You can construct a case where
- // we get a different answer from the JDK:
- // mkdir -p /tmp/a/b/c
- // ln -s /tmp/a/b /tmp/a/z
- // ... getCanonicalPath("/tmp/a/z/c/nosuchfile")
- // We will give /tmp/a/z/c/nosuchfile, while the JDK will
- // give /tmp/a/b/c/nosuchfile.
- int out_idx;
- if (buf[0] != '/')
+ // Skip slashes.
+ while (src[srci] == '/')
+ srci++;
+ int tmpi = srci;
+ // Find next slash.
+ while (src[srci] != '/' && src[srci] != '\0')
+ srci++;
+ if (srci == tmpi)
+ // We hit the end.
+ break;
+ len = srci - tmpi;
+
+ // Handle "." and "..".
+ if (len == 1 && src[tmpi] == '.')
+ continue;
+ if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
{
- // Not absolute, so start with current directory.
- if (getcwd (buf2, sizeof (buf2)) == NULL)
- throw new IOException ();
- out_idx = strlen (buf2);
+ while (dsti > 1 && dst[dsti - 1] != '/')
+ dsti--;
+ if (dsti != 1)
+ dsti--;
+ // Reenable filesystem checking if disabled, as we might
+ // have reversed over whatever caused the problem before.
+ // At least one proprietary JVM has inconsistencies because
+ // it does not do this.
+ fschecks = true;
+ continue;
}
- else
+
+ // Handle real path components.
+ dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1);
+ int dsti_save = dsti;
+ if (dsti > 1)
+ dst[dsti++] = '/';
+ strncpy (&dst[dsti], &src[tmpi], len);
+ dsti += len;
+ if (fschecks == false)
+ continue;
+
+#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
+ struct stat sb;
+ dst[dsti] = '\0';
+ if (::lstat (dst, &sb) == 0)
{
- buf2[0] = '/';
- out_idx = 1;
- }
- int in_idx = 0;
- while (buf[in_idx] != '\0')
- {
- // Skip '/'s.
- while (buf[in_idx] == '/')
- ++in_idx;
- int elt_start = in_idx;
- // Find next '/' or end of path.
- while (buf[in_idx] != '\0' && buf[in_idx] != '/')
- ++in_idx;
- if (in_idx == elt_start)
+ if (S_ISLNK (sb.st_mode))
{
- // An empty component means we've reached the end.
- break;
+ int tmpl = CHUNKSIZ;
+ char *tmp = (char *) _Jv_Malloc (tmpl);
+
+ while (1)
+ {
+ tmpi = ::readlink (dst, tmp, tmpl);
+ if (tmpi < 1)
+ {
+ _Jv_Free (src);
+ _Jv_Free (dst);
+ _Jv_Free (tmp);
+ throw new IOException (
+ JvNewStringLatin1 ("readlink failed"));
+ }
+ if (tmpi < tmpl)
+ break;
+ tmpl += CHUNKSIZ;
+ tmp = (char *) _Jv_Realloc (tmp, tmpl);
+ }
+
+ // Prepend the link's path to src.
+ tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1);
+ strcpy(&tmp[tmpi], &src[srci]);
+ _Jv_Free (src);
+ src = tmp;
+ srcl = tmpl;
+ srci = 0;
+
+ // Either replace or append dst depending on whether the
+ // link is relative or absolute.
+ dsti = src[0] == '/' ? 1 : dsti_save;
}
- int len = in_idx - elt_start;
- if (len == 1 && buf[in_idx] == '.')
- continue;
- if (len == 2 && buf[in_idx] == '.' && buf[in_idx + 1] == '.')
- {
- // Found ".." component, lop off last part from existing
- // buffer.
- --out_idx;
- while (out_idx > 0 && buf2[out_idx] != '/')
- --out_idx;
- // Can't go up past "/".
- if (out_idx == 0)
- ++out_idx;
- }
- else
- {
- // Append a real path component to the output.
- if (out_idx > 1)
- buf2[out_idx++] = '/';
- strncpy (&buf2[out_idx], &buf[elt_start], len);
- out_idx += len;
- }
}
-
- buf2[out_idx] = '\0';
+ else
+ {
+ // Something doesn't exist, or we don't have permission to
+ // read it, or a previous path component is a directory, or
+ // a symlink is looped. Whatever, we can't check the
+ // filesystem any more.
+ fschecks = false;
+ }
+#endif // HAVE_LSTAT && HAVE_READLINK
}
+ dst[dsti] = '\0';
// FIXME: what encoding to assume for file names? This affects many
// calls.
- return JvNewStringUTF (buf2);
-#else
- return JvNewStringUTF (buf);
-#endif
+ path = JvNewStringUTF (dst);
+ _Jv_Free (src);
+ _Jv_Free (dst);
+ return path;
}
jboolean
Index: libjava/java/io/File.java
===================================================================
--- libjava/java/io/File.java (revision 112529)
+++ libjava/java/io/File.java (working copy)
@@ -508,9 +508,9 @@
/**
* This method returns a canonical representation of the pathname of
* this file. The actual form of the canonical representation is
- * different. On the GNU system, the canonical form differs from the
- * absolute form in that all relative file references to "." and ".."
- * are resolved and removed.
+ * system-dependent. On the GNU system, conversion to canonical
+ * form involves the removal of redundant separators, references to
+ * "." and "..", and symbolic links.
* <p>
* Note that this method, unlike the other methods which return path
* names, can throw an IOException. This is because native method
Index: libjava/configure.ac
===================================================================
--- libjava/configure.ac (revision 112529)
+++ libjava/configure.ac (working copy)
@@ -895,7 +895,7 @@
else
AC_CHECK_FUNCS([strerror ioctl select fstat open fsync sleep opendir \
gmtime_r localtime_r readdir_r getpwuid_r getcwd \
- access stat mkdir rename rmdir unlink realpath utime chmod \
+ access stat lstat mkdir rename rmdir unlink utime chmod readlink \
nl_langinfo setlocale \
inet_pton uname inet_ntoa \
fork execvp pipe sigaction ftruncate])
Index: libjava/configure
===================================================================
--- libjava/configure (revision 112529)
+++ libjava/configure (working copy)
@@ -9388,9 +9388,10 @@
+
for ac_func in strerror ioctl select fstat open fsync sleep opendir \
gmtime_r localtime_r readdir_r getpwuid_r getcwd \
- access stat mkdir rename rmdir unlink realpath utime chmod \
+ access stat lstat mkdir rename rmdir unlink utime chmod readlink \
nl_langinfo setlocale \
inet_pton uname inet_ntoa \
fork execvp pipe sigaction ftruncate
Index: libjava/include/config.h.in
===================================================================
--- libjava/include/config.h.in (revision 112529)
+++ libjava/include/config.h.in (working copy)
@@ -172,6 +172,9 @@
/* Define to 1 if you have the `localtime_r' function. */
#undef HAVE_LOCALTIME_R
+/* Define to 1 if you have the `lstat' function. */
+#undef HAVE_LSTAT
+
/* Define to 1 if you have the `memcpy' function. */
#undef HAVE_MEMCPY
@@ -229,8 +232,8 @@
/* Define to 1 if you have the `readdir_r' function. */
#undef HAVE_READDIR_R
-/* Define to 1 if you have the `realpath' function. */
-#undef HAVE_REALPATH
+/* Define to 1 if you have the `readlink' function. */
+#undef HAVE_READLINK
/* Define to 1 if you have the `rename' function. */
#undef HAVE_RENAME
[-- Attachment #3: canon.cc --]
[-- Type: text/plain, Size: 3334 bytes --]
#define CHUNKLOG 8
#define CHUNKSIZ (1 << CHUNKLOG)
static int
nextChunkSize (int size)
{
return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG;
}
static char *
maybeGrowBuf (char *buf, int *size, int required)
{
if (required > *size)
{
*size = nextChunkSize (required);
buf = (char *) _Jv_Realloc (buf, *size);
}
return buf;
}
jstring
java::io::File::getCanonicalPath (void)
{
jstring path = getAbsolutePath ();
int len = JvGetStringUTFLength (path);
int srcl = nextChunkSize (len + 1);
char *src = (char *) _Jv_Malloc (srcl);
JvGetStringUTFRegion (path, 0, path->length(), src);
src[len] = '\0';
int srci = 1;
int dstl = nextChunkSize (2);
char *dst = (char *) _Jv_Malloc (dstl);
dst[0] = '/';
int dsti = 1;
bool fschecks = true;
while (src[srci] != '\0')
{
// Skip slashes.
while (src[srci] == '/')
srci++;
int tmpi = srci;
// Find next slash.
while (src[srci] != '/' && src[srci] != '\0')
srci++;
if (srci == tmpi)
// We hit the end.
break;
len = srci - tmpi;
// Handle "." and "..".
if (len == 1 && src[tmpi] == '.')
continue;
if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.')
{
while (dsti > 1 && dst[dsti - 1] != '/')
dsti--;
if (dsti != 1)
dsti--;
// Reenable filesystem checking if disabled, as we might
// have reversed over whatever caused the problem before.
// At least one proprietary JVM has inconsistencies because
// it does not do this.
fschecks = true;
continue;
}
// Handle real path components.
dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1);
int dsti_save = dsti;
if (dsti > 1)
dst[dsti++] = '/';
strncpy (&dst[dsti], &src[tmpi], len);
dsti += len;
if (fschecks == false)
continue;
#if defined (HAVE_LSTAT) && defined (HAVE_READLINK)
struct stat sb;
dst[dsti] = '\0';
if (::lstat (dst, &sb) == 0)
{
if (S_ISLNK (sb.st_mode))
{
int tmpl = CHUNKSIZ;
char *tmp = (char *) _Jv_Malloc (tmpl);
while (1)
{
tmpi = ::readlink (dst, tmp, tmpl);
if (tmpi < 1)
{
_Jv_Free (src);
_Jv_Free (dst);
_Jv_Free (tmp);
throw new IOException (
JvNewStringLatin1 ("readlink failed"));
}
if (tmpi < tmpl)
break;
tmpl += CHUNKSIZ;
tmp = (char *) _Jv_Realloc (tmp, tmpl);
}
// Prepend the link's path to src.
tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1);
strcpy(&tmp[tmpi], &src[srci]);
_Jv_Free (src);
src = tmp;
srcl = tmpl;
srci = 0;
// Either replace or append dst depending on whether the
// link is relative or absolute.
dsti = src[0] == '/' ? 1 : dsti_save;
}
}
else
{
// Something doesn't exist, or we don't have permission to
// read it, or a previous path component is a directory, or
// a symlink is looped. Whatever, we can't check the
// filesystem any more.
fschecks = false;
}
#endif // HAVE_LSTAT && HAVE_READLINK
}
dst[dsti] = '\0';
// FIXME: what encoding to assume for file names? This affects many
// calls.
path = JvNewStringUTF (dst);
_Jv_Free (src);
_Jv_Free (dst);
return path;
}
next prev parent reply other threads:[~2006-04-19 13:55 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-04-03 12:32 Patch: rewrite File.toCanonicalPath() for GNU/Posix systems Gary Benson
2006-04-05 22:00 ` Tom Tromey
2006-04-06 10:03 ` Gary Benson
2006-04-07 15:53 ` Patch: rewrite File.toCanonicalPath() take 2 Gary Benson
2006-04-12 10:32 ` Andrew Haley
2006-04-19 13:55 ` Gary Benson [this message]
2006-05-12 8:16 ` PING: rewrite File.toCanonicalPath() Gary Benson
2006-05-31 8:21 ` Second ping: " Gary Benson
2006-06-01 7:01 ` Bryce McKinlay
2006-06-05 12:07 ` Gary Benson
2006-06-05 15:13 ` Andrew Haley
2006-06-06 15:18 ` Patch: FYI: " Gary Benson
2006-06-01 7:54 ` Second ping: " Bryce McKinlay
2006-06-01 8:12 ` Gary Benson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060419135530.GA12427@redhat.com \
--to=gbenson@redhat.com \
--cc=java-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).