From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 27375 invoked by alias); 31 May 2006 08:21:00 -0000 Received: (qmail 27334 invoked by uid 22791); 31 May 2006 08:20:52 -0000 X-Spam-Check-By: sourceware.org Received: from gbenson.demon.co.uk (HELO gbenson.demon.co.uk) (80.177.220.214) by sourceware.org (qpsmtpd/0.31) with ESMTP; Wed, 31 May 2006 08:20:31 +0000 Received: from mambo.wire.rat ([192.168.1.6]) by gbenson.demon.co.uk with esmtp (Exim 3.36 #1) id 1FlLwK-0003y9-00 for java-patches@gcc.gnu.org; Wed, 31 May 2006 09:20:28 +0100 Received: from mambo.wire.rat (localhost.localdomain [127.0.0.1]) by mambo.wire.rat (8.13.6/8.13.6) with ESMTP id k4V8KR04024089 for ; Wed, 31 May 2006 09:20:27 +0100 Received: (from gary@localhost) by mambo.wire.rat (8.13.6/8.13.6/Submit) id k4V8KRTZ024088 for java-patches@gcc.gnu.org; Wed, 31 May 2006 09:20:27 +0100 Date: Wed, 31 May 2006 08:21:00 -0000 From: Gary Benson To: java-patches@gcc.gnu.org Subject: Second ping: rewrite File.toCanonicalPath() Message-ID: <20060531082025.GA23651@redhat.com> Mail-Followup-To: java-patches@gcc.gnu.org References: <20060403123246.GA5292@redhat.com> <20060407155319.GC5246@redhat.com> <17468.55086.917234.44472@zapata.pink> <20060419135530.GA12427@redhat.com> <20060512081444.GB5188@redhat.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="W/nzBZO5zC0uMSeA" Content-Disposition: inline In-Reply-To: <20060512081444.GB5188@redhat.com> X-IsSubscribed: yes Mailing-List: contact java-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Subscribe: List-Archive: List-Post: List-Help: , Sender: java-patches-owner@gcc.gnu.org X-SW-Source: 2006-q2/txt/msg00266.txt.bz2 --W/nzBZO5zC0uMSeA Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-length: 519 Hi all, A couple of months ago I posted a patch that rewrote File.toCanonicalPath() on GNU/Posix systems in order for FilePermission checks to work properly, so much of the security framework relies on it. I rewrote it a couple of times at various people's requests, but this (final?) version met with no response whatsoever. Can someone please approve it or tell me what's wrong with it? As before the diff of the method itself is not very legible so I've attached a copy of it as well as the patch. Cheers, Gary --W/nzBZO5zC0uMSeA Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename=patch Content-length: 8850 Index: libjava/java/io/natFilePosix.cc =================================================================== --- libjava/java/io/natFilePosix.cc (revision 112529) +++ libjava/java/io/natFilePosix.cc (working copy) @@ -101,94 +101,144 @@ #endif } +#define CHUNKLOG 8 +#define CHUNKSIZ (1 << CHUNKLOG) + +static int +nextChunkSize (int size) +{ + return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG; +} + +static char * +maybeGrowBuf (char *buf, int *size, int required) +{ + if (required > *size) + { + *size = nextChunkSize (required); + buf = (char *) _Jv_Realloc (buf, *size); + } + return buf; +} + jstring java::io::File::getCanonicalPath (void) { - // We use `+2' here because we might need to use `.' for our special - // case. - char *buf = (char *) __builtin_alloca (JvGetStringUTFLength (path) + 2); - char buf2[MAXPATHLEN]; - jsize total = JvGetStringUTFRegion (path, 0, path->length(), buf); + jstring path = getAbsolutePath (); - // Special case: treat "" the same as ".". - if (total == 0) - buf[total++] = '.'; + int len = JvGetStringUTFLength (path); + int srcl = nextChunkSize (len + 1); + char *src = (char *) _Jv_Malloc (srcl); + JvGetStringUTFRegion (path, 0, path->length(), src); + src[len] = '\0'; + int srci = 1; - buf[total] = '\0'; + int dstl = nextChunkSize (2); + char *dst = (char *) _Jv_Malloc (dstl); + dst[0] = '/'; + int dsti = 1; -#ifdef HAVE_REALPATH - if (realpath (buf, buf2) == NULL) + bool fschecks = true; + + while (src[srci] != '\0') { - // If realpath failed, we have to come up with a canonical path - // anyway. We do this with purely textual manipulation. - // FIXME: this isn't perfect. You can construct a case where - // we get a different answer from the JDK: - // mkdir -p /tmp/a/b/c - // ln -s /tmp/a/b /tmp/a/z - // ... getCanonicalPath("/tmp/a/z/c/nosuchfile") - // We will give /tmp/a/z/c/nosuchfile, while the JDK will - // give /tmp/a/b/c/nosuchfile. - int out_idx; - if (buf[0] != '/') + // Skip slashes. + while (src[srci] == '/') + srci++; + int tmpi = srci; + // Find next slash. + while (src[srci] != '/' && src[srci] != '\0') + srci++; + if (srci == tmpi) + // We hit the end. + break; + len = srci - tmpi; + + // Handle "." and "..". + if (len == 1 && src[tmpi] == '.') + continue; + if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.') { - // Not absolute, so start with current directory. - if (getcwd (buf2, sizeof (buf2)) == NULL) - throw new IOException (); - out_idx = strlen (buf2); + while (dsti > 1 && dst[dsti - 1] != '/') + dsti--; + if (dsti != 1) + dsti--; + // Reenable filesystem checking if disabled, as we might + // have reversed over whatever caused the problem before. + // At least one proprietary JVM has inconsistencies because + // it does not do this. + fschecks = true; + continue; } - else + + // Handle real path components. + dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1); + int dsti_save = dsti; + if (dsti > 1) + dst[dsti++] = '/'; + strncpy (&dst[dsti], &src[tmpi], len); + dsti += len; + if (fschecks == false) + continue; + +#if defined (HAVE_LSTAT) && defined (HAVE_READLINK) + struct stat sb; + dst[dsti] = '\0'; + if (::lstat (dst, &sb) == 0) { - buf2[0] = '/'; - out_idx = 1; - } - int in_idx = 0; - while (buf[in_idx] != '\0') - { - // Skip '/'s. - while (buf[in_idx] == '/') - ++in_idx; - int elt_start = in_idx; - // Find next '/' or end of path. - while (buf[in_idx] != '\0' && buf[in_idx] != '/') - ++in_idx; - if (in_idx == elt_start) + if (S_ISLNK (sb.st_mode)) { - // An empty component means we've reached the end. - break; + int tmpl = CHUNKSIZ; + char *tmp = (char *) _Jv_Malloc (tmpl); + + while (1) + { + tmpi = ::readlink (dst, tmp, tmpl); + if (tmpi < 1) + { + _Jv_Free (src); + _Jv_Free (dst); + _Jv_Free (tmp); + throw new IOException ( + JvNewStringLatin1 ("readlink failed")); + } + if (tmpi < tmpl) + break; + tmpl += CHUNKSIZ; + tmp = (char *) _Jv_Realloc (tmp, tmpl); + } + + // Prepend the link's path to src. + tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1); + strcpy(&tmp[tmpi], &src[srci]); + _Jv_Free (src); + src = tmp; + srcl = tmpl; + srci = 0; + + // Either replace or append dst depending on whether the + // link is relative or absolute. + dsti = src[0] == '/' ? 1 : dsti_save; } - int len = in_idx - elt_start; - if (len == 1 && buf[in_idx] == '.') - continue; - if (len == 2 && buf[in_idx] == '.' && buf[in_idx + 1] == '.') - { - // Found ".." component, lop off last part from existing - // buffer. - --out_idx; - while (out_idx > 0 && buf2[out_idx] != '/') - --out_idx; - // Can't go up past "/". - if (out_idx == 0) - ++out_idx; - } - else - { - // Append a real path component to the output. - if (out_idx > 1) - buf2[out_idx++] = '/'; - strncpy (&buf2[out_idx], &buf[elt_start], len); - out_idx += len; - } } - - buf2[out_idx] = '\0'; + else + { + // Something doesn't exist, or we don't have permission to + // read it, or a previous path component is a directory, or + // a symlink is looped. Whatever, we can't check the + // filesystem any more. + fschecks = false; + } +#endif // HAVE_LSTAT && HAVE_READLINK } + dst[dsti] = '\0'; // FIXME: what encoding to assume for file names? This affects many // calls. - return JvNewStringUTF (buf2); -#else - return JvNewStringUTF (buf); -#endif + path = JvNewStringUTF (dst); + _Jv_Free (src); + _Jv_Free (dst); + return path; } jboolean Index: libjava/java/io/File.java =================================================================== --- libjava/java/io/File.java (revision 112529) +++ libjava/java/io/File.java (working copy) @@ -508,9 +508,9 @@ /** * This method returns a canonical representation of the pathname of * this file. The actual form of the canonical representation is - * different. On the GNU system, the canonical form differs from the - * absolute form in that all relative file references to "." and ".." - * are resolved and removed. + * system-dependent. On the GNU system, conversion to canonical + * form involves the removal of redundant separators, references to + * "." and "..", and symbolic links. *

* Note that this method, unlike the other methods which return path * names, can throw an IOException. This is because native method Index: libjava/configure.ac =================================================================== --- libjava/configure.ac (revision 112529) +++ libjava/configure.ac (working copy) @@ -895,7 +895,7 @@ else AC_CHECK_FUNCS([strerror ioctl select fstat open fsync sleep opendir \ gmtime_r localtime_r readdir_r getpwuid_r getcwd \ - access stat mkdir rename rmdir unlink realpath utime chmod \ + access stat lstat mkdir rename rmdir unlink utime chmod readlink \ nl_langinfo setlocale \ inet_pton uname inet_ntoa \ fork execvp pipe sigaction ftruncate]) Index: libjava/configure =================================================================== --- libjava/configure (revision 112529) +++ libjava/configure (working copy) @@ -9388,9 +9388,10 @@ + for ac_func in strerror ioctl select fstat open fsync sleep opendir \ gmtime_r localtime_r readdir_r getpwuid_r getcwd \ - access stat mkdir rename rmdir unlink realpath utime chmod \ + access stat lstat mkdir rename rmdir unlink utime chmod readlink \ nl_langinfo setlocale \ inet_pton uname inet_ntoa \ fork execvp pipe sigaction ftruncate Index: libjava/include/config.h.in =================================================================== --- libjava/include/config.h.in (revision 112529) +++ libjava/include/config.h.in (working copy) @@ -172,6 +172,9 @@ /* Define to 1 if you have the `localtime_r' function. */ #undef HAVE_LOCALTIME_R +/* Define to 1 if you have the `lstat' function. */ +#undef HAVE_LSTAT + /* Define to 1 if you have the `memcpy' function. */ #undef HAVE_MEMCPY @@ -229,8 +232,8 @@ /* Define to 1 if you have the `readdir_r' function. */ #undef HAVE_READDIR_R -/* Define to 1 if you have the `realpath' function. */ -#undef HAVE_REALPATH +/* Define to 1 if you have the `readlink' function. */ +#undef HAVE_READLINK /* Define to 1 if you have the `rename' function. */ #undef HAVE_RENAME --W/nzBZO5zC0uMSeA Content-Type: text/plain; charset=us-ascii Content-Disposition: attachment; filename="canon.cc" Content-length: 3334 #define CHUNKLOG 8 #define CHUNKSIZ (1 << CHUNKLOG) static int nextChunkSize (int size) { return ((size >> CHUNKLOG) + ((size & (CHUNKSIZ - 1)) ? 1 : 0)) << CHUNKLOG; } static char * maybeGrowBuf (char *buf, int *size, int required) { if (required > *size) { *size = nextChunkSize (required); buf = (char *) _Jv_Realloc (buf, *size); } return buf; } jstring java::io::File::getCanonicalPath (void) { jstring path = getAbsolutePath (); int len = JvGetStringUTFLength (path); int srcl = nextChunkSize (len + 1); char *src = (char *) _Jv_Malloc (srcl); JvGetStringUTFRegion (path, 0, path->length(), src); src[len] = '\0'; int srci = 1; int dstl = nextChunkSize (2); char *dst = (char *) _Jv_Malloc (dstl); dst[0] = '/'; int dsti = 1; bool fschecks = true; while (src[srci] != '\0') { // Skip slashes. while (src[srci] == '/') srci++; int tmpi = srci; // Find next slash. while (src[srci] != '/' && src[srci] != '\0') srci++; if (srci == tmpi) // We hit the end. break; len = srci - tmpi; // Handle "." and "..". if (len == 1 && src[tmpi] == '.') continue; if (len == 2 && src[tmpi] == '.' && src[tmpi + 1] == '.') { while (dsti > 1 && dst[dsti - 1] != '/') dsti--; if (dsti != 1) dsti--; // Reenable filesystem checking if disabled, as we might // have reversed over whatever caused the problem before. // At least one proprietary JVM has inconsistencies because // it does not do this. fschecks = true; continue; } // Handle real path components. dst = maybeGrowBuf (dst, &dstl, dsti + (dsti > 1 ? 1 : 0) + len + 1); int dsti_save = dsti; if (dsti > 1) dst[dsti++] = '/'; strncpy (&dst[dsti], &src[tmpi], len); dsti += len; if (fschecks == false) continue; #if defined (HAVE_LSTAT) && defined (HAVE_READLINK) struct stat sb; dst[dsti] = '\0'; if (::lstat (dst, &sb) == 0) { if (S_ISLNK (sb.st_mode)) { int tmpl = CHUNKSIZ; char *tmp = (char *) _Jv_Malloc (tmpl); while (1) { tmpi = ::readlink (dst, tmp, tmpl); if (tmpi < 1) { _Jv_Free (src); _Jv_Free (dst); _Jv_Free (tmp); throw new IOException ( JvNewStringLatin1 ("readlink failed")); } if (tmpi < tmpl) break; tmpl += CHUNKSIZ; tmp = (char *) _Jv_Realloc (tmp, tmpl); } // Prepend the link's path to src. tmp = maybeGrowBuf (tmp, &tmpl, tmpi + strlen (&src[srci]) + 1); strcpy(&tmp[tmpi], &src[srci]); _Jv_Free (src); src = tmp; srcl = tmpl; srci = 0; // Either replace or append dst depending on whether the // link is relative or absolute. dsti = src[0] == '/' ? 1 : dsti_save; } } else { // Something doesn't exist, or we don't have permission to // read it, or a previous path component is a directory, or // a symlink is looped. Whatever, we can't check the // filesystem any more. fschecks = false; } #endif // HAVE_LSTAT && HAVE_READLINK } dst[dsti] = '\0'; // FIXME: what encoding to assume for file names? This affects many // calls. path = JvNewStringUTF (dst); _Jv_Free (src); _Jv_Free (dst); return path; } --W/nzBZO5zC0uMSeA--