[PATCH] Fix fnmatch handling of collating elements (BZ #17396)

public inbox for libc-alpha@sourceware.org
 help / color / mirror / Atom feed

* [PATCH] Fix fnmatch handling of collating elements (BZ #17396)
@ 2014-09-17 10:19 Andreas Schwab
  2014-09-17 15:39 ` Joseph S. Myers
  0 siblings, 1 reply; 3+ messages in thread
From: Andreas Schwab @ 2014-09-17 10:19 UTC (permalink / raw)
  To: libc-alpha

This is basically the same as BZ #11561, only for fnmatch, and mixing in
a couple of additional bugs.

It raises a few points:

- There is no efficient way to look up a collating element, like regcomp
  it needs to loop through all the collating symbol names to find the
  associated byte sequences and compare it with the given collating
  element.
- This also removes the last use of elem_hash in locale/elem-hash.h (apart
  from localedef which generates the hash table).  AFAICS the collating
  symbol names serve no purpose outside of the locale source files.

Andreas.

	[BZ #17396]
	* posix/fnmatch_loop.c (FCT): When looking up collating elements
	match against (wide) character sequence instead of name.  Correct
	alignment adjustment.
	* posix/fnmatch.c: Don't include "../locale/elem-hash.h".
	* posix/Makefile (tests): Add tst-fnmatch4.
	* posix/tst-fnmatch4.c: New file.

	* Makefile (LOCALES): Add es_US.UTF-8 and es_US.ISO-8859-1.
---
 localedata/Makefile  |   2 +-
 posix/Makefile       |   2 +-
 posix/fnmatch.c      |   1 -
 posix/fnmatch_loop.c | 227 ++++++++++++++++++++-------------------------------
 posix/tst-fnmatch4.c |  39 +++++++++
 5 files changed, 128 insertions(+), 143 deletions(-)
 create mode 100644 posix/tst-fnmatch4.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 6424f66..acaa233 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
 	   hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
 	   nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
 	   zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
-	   tr_TR.ISO-8859-9 en_GB.UTF-8
+	   tr_TR.ISO-8859-9 en_GB.UTF-8 es_US.UTF-8 es_US.ISO-8859-1
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
 CHARMAPS := $(shell echo "$(LOCALES)" | \
 		    sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/posix/Makefile b/posix/Makefile
index e6b69b4..174a10a 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -87,7 +87,7 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
 		   bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
 		   tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
-		   tst-fnmatch3 bug-regex36
+		   tst-fnmatch3 bug-regex36 tst-fnmatch4
 xtests		:= bug-ga2
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 85a6ec2..316c6d9 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -53,7 +53,6 @@
    we support a correct implementation only in glibc.  */
 #ifdef _LIBC
 # include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
 # include "../locale/coll-lookup.h"
 # include <shlib-compat.h>
 
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index db6d9d7..fd9cc9f 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -498,26 +498,12 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			  {
 			    int32_t table_size;
 			    const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-			    char str[c1];
-			    unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
 			    const unsigned char *extra;
 			    int32_t idx;
 			    int32_t elem;
-			    int32_t second;
-			    int32_t hash;
-
 # if WIDE_CHAR_VERSION
-			    /* We have to convert the name to a single-byte
-			       string.  This is possible since the names
-			       consist of ASCII characters and the internal
-			       representation is UCS4.  */
-			    for (strcnt = 0; strcnt < c1; ++strcnt)
-			      str[strcnt] = startp[1 + strcnt];
-#endif
+			    int32_t *wextra;
+# endif
 
 			    table_size =
 			      _NL_CURRENT_WORD (LC_COLLATE,
@@ -529,71 +515,55 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			      _NL_CURRENT (LC_COLLATE,
 					   _NL_COLLATE_SYMB_EXTRAMB);
 
-			    /* Locate the character in the hashing table.  */
-			    hash = elem_hash (str, c1);
-
-			    idx = 0;
-			    elem = hash % table_size;
-			    if (symb_table[2 * elem] != 0)
-			      {
-				second = hash % (table_size - 2) + 1;
-
-				do
-				  {
-				    /* First compare the hashing value.  */
-				    if (symb_table[2 * elem] == hash
-					&& (c1
-					    == extra[symb_table[2 * elem + 1]])
-					&& memcmp (str,
-						   &extra[symb_table[2 * elem
-								     + 1]
-							  + 1], c1) == 0)
-				      {
-					/* Yep, this is the entry.  */
-					idx = symb_table[2 * elem + 1];
-					idx += 1 + extra[idx];
-					break;
-				      }
-
-				    /* Next entry.  */
-				    elem += second;
-				  }
-				while (symb_table[2 * elem] != 0);
-			      }
+			    for (elem = 0; elem < table_size; elem++)
+			      if (symb_table[2 * elem] != 0)
+				{
+				  idx = symb_table[2 * elem + 1];
+				  /* Skip the name of collating element.  */
+				  idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+				  /* Skip the byte sequence of the
+				     collating element.  */
+				  idx += 1 + extra[idx];
+				  /* Adjust for the alignment.  */
+				  idx = (idx + 3) & ~3;
+
+				  wextra = (int32_t *) &extra[idx + 4];
+
+				  if (/* Compare the length of the sequence.  */
+				      c1 == wextra[0]
+				      /* Compare the wide char sequence.  */
+				      && memcmp (startp + 1, &wextra[1],
+						 c1 * sizeof (UCHAR)) == 0)
+				    /* Yep, this is the entry.  */
+				    break;
+# else
+				  if (/* Compare the length of the sequence.  */
+				      c1 == extra[idx]
+				      /* Compare the byte sequence.  */
+				      && memcmp (startp + 1,
+						 &extra[idx + 1], c1) == 0)
+				    /* Yep, this is the entry.  */
+				    break;
+# endif
+				}
 
-			    if (symb_table[2 * elem] != 0)
+			    if (elem < table_size)
 			      {
 				/* Compare the byte sequence but only if
 				   this is not part of a range.  */
-# if WIDE_CHAR_VERSION
-				int32_t *wextra;
+				if (! is_range
 
-				idx += 1 + extra[idx];
-				/* Adjust for the alignment.  */
-				idx = (idx + 3) & ~3;
-
-				wextra = (int32_t *) &extra[idx + 4];
-# endif
-
-				if (! is_range)
-				  {
 # if WIDE_CHAR_VERSION
-				    for (c1 = 0;
-					 (int32_t) c1 < wextra[idx];
-					 ++c1)
-				      if (n[c1] != wextra[1 + c1])
-					break;
-
-				    if ((int32_t) c1 == wextra[idx])
-				      goto matched;
+				    && memcmp (n, &wextra[1],
+					       c1 * sizeof (UCHAR)) == 0
 # else
-				    for (c1 = 0; c1 < extra[idx]; ++c1)
-				      if (n[c1] != extra[1 + c1])
-					break;
-
-				    if (c1 == extra[idx])
-				      goto matched;
+				    && memcmp (n, &extra[idx + 1], c1) == 0
 # endif
+				    )
+				  {
+				    n += c1 - 1;
+				    goto matched;
 				  }
 
 				/* Get the collation sequence value.  */
@@ -601,9 +571,9 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 # if WIDE_CHAR_VERSION
 				cold = wextra[1 + wextra[idx]];
 # else
-				/* Adjust for the alignment.  */
 				idx += 1 + extra[idx];
-				idx = (idx + 3) & ~4;
+				/* Adjust for the alignment.  */
+				idx = (idx + 3) & ~3;
 				cold = *((int32_t *) &extra[idx]);
 # endif
 
@@ -613,10 +583,10 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			      {
 				/* No valid character.  Match it as a
 				   single byte.  */
-				if (!is_range && *n == str[0])
+				if (!is_range && *n == startp[1])
 				  goto matched;
 
-				cold = str[0];
+				cold = startp[1];
 				c = *p++;
 			      }
 			    else
@@ -624,7 +594,6 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			  }
 		      }
 		    else
-# undef str
 #endif
 		      {
 			c = FOLD (c);
@@ -716,25 +685,11 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			      {
 				int32_t table_size;
 				const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-				char str[c1];
-				unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
 				const unsigned char *extra;
 				int32_t idx;
 				int32_t elem;
-				int32_t second;
-				int32_t hash;
-
 # if WIDE_CHAR_VERSION
-				/* We have to convert the name to a single-byte
-				   string.  This is possible since the names
-				   consist of ASCII characters and the internal
-				   representation is UCS4.  */
-				for (strcnt = 0; strcnt < c1; ++strcnt)
-				  str[strcnt] = startp[1 + strcnt];
+				int32_t *wextra;
 # endif
 
 				table_size =
@@ -747,51 +702,44 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 				  _NL_CURRENT (LC_COLLATE,
 					       _NL_COLLATE_SYMB_EXTRAMB);
 
-				/* Locate the character in the hashing
-				   table.  */
-				hash = elem_hash (str, c1);
-
-				idx = 0;
-				elem = hash % table_size;
-				if (symb_table[2 * elem] != 0)
-				  {
-				    second = hash % (table_size - 2) + 1;
-
-				    do
-				      {
-					/* First compare the hashing value.  */
-					if (symb_table[2 * elem] == hash
-					    && (c1
-						== extra[symb_table[2 * elem + 1]])
-					    && memcmp (str,
-						       &extra[symb_table[2 * elem + 1]
-							      + 1], c1) == 0)
-					  {
-					    /* Yep, this is the entry.  */
-					    idx = symb_table[2 * elem + 1];
-					    idx += 1 + extra[idx];
-					    break;
-					  }
-
-					/* Next entry.  */
-					elem += second;
-				      }
-				    while (symb_table[2 * elem] != 0);
-				  }
-
-				if (symb_table[2 * elem] != 0)
-				  {
-				    /* Compare the byte sequence but only if
-				       this is not part of a range.  */
+				for (elem = 0; elem < table_size; elem++)
+				  if (symb_table[2 * elem] != 0)
+				    {
+				      idx = symb_table[2 * elem + 1];
+				      /* Skip the name of collating
+					 element.  */
+				      idx += 1 + extra[idx];
 # if WIDE_CHAR_VERSION
-				    int32_t *wextra;
-
-				    idx += 1 + extra[idx];
-				    /* Adjust for the alignment.  */
-				    idx = (idx + 3) & ~4;
-
-				    wextra = (int32_t *) &extra[idx + 4];
+				      /* Skip the byte sequence of the
+					 collating element.  */
+				      idx += 1 + extra[idx];
+				      /* Adjust for the alignment.  */
+				      idx = (idx + 3) & ~3;
+
+				      wextra = (int32_t *) &extra[idx + 4];
+
+				      if (/* Compare the length of the
+					     sequence.  */
+					  c1 == wextra[0]
+					  /* Compare the wide char sequence.  */
+					  && memcmp (startp + 1, &wextra[1],
+						     c1 * sizeof (int32_t)) == 0)
+					/* Yep, this is the entry.  */
+					break;
+# else
+				      if (/* Compare the length of the
+					     sequence.  */
+					  c1 == extra[idx]
+					  /* Compare the byte sequence.  */
+					  && memcmp (startp + 1,
+						     &extra[idx + 1], c1) == 0)
+					/* Yep, this is the entry.  */
+					break;
 # endif
+				    }
+
+				if (elem < table_size)
+				  {
 				    /* Get the collation sequence value.  */
 				    is_seqval = 1;
 # if WIDE_CHAR_VERSION
@@ -799,19 +747,18 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 # else
 				    /* Adjust for the alignment.  */
 				    idx += 1 + extra[idx];
-				    idx = (idx + 3) & ~4;
+				    idx = (idx + 3) & ~3;
 				    cend = *((int32_t *) &extra[idx]);
 # endif
 				  }
-				else if (symb_table[2 * elem] != 0 && c1 == 1)
+				else if (c1 == 1)
 				  {
-				    cend = str[0];
+				    cend = startp[1];
 				    c = *p++;
 				  }
 				else
 				  return FNM_NOMATCH;
 			      }
-# undef str
 			  }
 			else
 			  {
diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c
new file mode 100644
index 0000000..5ad284a
--- /dev/null
+++ b/posix/tst-fnmatch4.c
@@ -0,0 +1,39 @@
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <locale.h>
+#include <fnmatch.h>
+
+int
+do_test (void)
+{
+  const char *pattern = "[[.ll.]]";
+
+  setlocale (LC_ALL, "es_US.ISO-8859-1");
+  if (fnmatch (pattern, "ll", 0) != 0)
+    return 1;
+
+  setlocale (LC_ALL, "es_US.UTF-8");
+  if (fnmatch (pattern, "ll", 0) != 0)
+    return 1;
+
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
-- 
2.1.0

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix fnmatch handling of collating elements (BZ #17396)
  2014-09-17 10:19 [PATCH] Fix fnmatch handling of collating elements (BZ #17396) Andreas Schwab
@ 2014-09-17 15:39 ` Joseph S. Myers
  2014-09-18  7:31   ` Andreas Schwab
  0 siblings, 1 reply; 3+ messages in thread
From: Joseph S. Myers @ 2014-09-17 15:39 UTC (permalink / raw)
  To: Andreas Schwab; +Cc: libc-alpha

On Wed, 17 Sep 2014, Andreas Schwab wrote:

> -			    char str[c1];

> -				char str[c1];

Does this fix the unbounded VLA allocation (bug 16976)?  If so, then apart 
from mentioning [BZ #16976] I think a testcase for that bug should be 
added as well.

-- 
Joseph S. Myers
joseph@codesourcery.com

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Fix fnmatch handling of collating elements (BZ #17396)
  2014-09-17 15:39 ` Joseph S. Myers
@ 2014-09-18  7:31   ` Andreas Schwab
  0 siblings, 0 replies; 3+ messages in thread
From: Andreas Schwab @ 2014-09-18  7:31 UTC (permalink / raw)
  To: Joseph S. Myers; +Cc: libc-alpha

"Joseph S. Myers" <joseph@codesourcery.com> writes:

> On Wed, 17 Sep 2014, Andreas Schwab wrote:
>
>> -			    char str[c1];
>
>> -				char str[c1];
>
> Does this fix the unbounded VLA allocation (bug 16976)?  If so, then apart 
> from mentioning [BZ #16976] I think a testcase for that bug should be 
> added as well.

Thanks, here's an updated patch.

Andreas.

	[BZ #16976]
	[BZ #17396]
	* posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When
	looking up collating elements match against (wide) character
	sequence instead of name.  Correct alignment adjustment.
	* posix/fnmatch.c: Don't include "../locale/elem-hash.h".
	* posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5.
	* posix/tst-fnmatch4.c: New file.
	* posix/tst-fnmatch5.c: New file.

	* Makefile (LOCALES): Add es_US.UTF-8 and es_US.ISO-8859-1.
---
 localedata/Makefile  |   2 +-
 posix/Makefile       |   2 +-
 posix/fnmatch.c      |   1 -
 posix/fnmatch_loop.c | 227 ++++++++++++++++++++-------------------------------
 posix/tst-fnmatch4.c |  51 ++++++++++++
 posix/tst-fnmatch5.c |  53 ++++++++++++
 6 files changed, 193 insertions(+), 143 deletions(-)
 create mode 100644 posix/tst-fnmatch4.c
 create mode 100644 posix/tst-fnmatch5.c

diff --git a/localedata/Makefile b/localedata/Makefile
index 6424f66..acaa233 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
 	   hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
 	   nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
 	   zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
-	   tr_TR.ISO-8859-9 en_GB.UTF-8
+	   tr_TR.ISO-8859-9 en_GB.UTF-8 es_US.UTF-8 es_US.ISO-8859-1
 LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
 CHARMAPS := $(shell echo "$(LOCALES)" | \
 		    sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/posix/Makefile b/posix/Makefile
index e6b69b4..bea01cb 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -87,7 +87,7 @@ tests		:= tstgetopt testfnm runtests runptests	     \
 		   bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
 		   bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
 		   tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
-		   tst-fnmatch3 bug-regex36
+		   tst-fnmatch3 bug-regex36 tst-fnmatch4 tst-fnmatch5
 xtests		:= bug-ga2
 ifeq (yes,$(build-shared))
 test-srcs	:= globtest
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 85a6ec2..316c6d9 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -53,7 +53,6 @@
    we support a correct implementation only in glibc.  */
 #ifdef _LIBC
 # include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
 # include "../locale/coll-lookup.h"
 # include <shlib-compat.h>
 
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index db6d9d7..fd9cc9f 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -498,26 +498,12 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			  {
 			    int32_t table_size;
 			    const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-			    char str[c1];
-			    unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
 			    const unsigned char *extra;
 			    int32_t idx;
 			    int32_t elem;
-			    int32_t second;
-			    int32_t hash;
-
 # if WIDE_CHAR_VERSION
-			    /* We have to convert the name to a single-byte
-			       string.  This is possible since the names
-			       consist of ASCII characters and the internal
-			       representation is UCS4.  */
-			    for (strcnt = 0; strcnt < c1; ++strcnt)
-			      str[strcnt] = startp[1 + strcnt];
-#endif
+			    int32_t *wextra;
+# endif
 
 			    table_size =
 			      _NL_CURRENT_WORD (LC_COLLATE,
@@ -529,71 +515,55 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			      _NL_CURRENT (LC_COLLATE,
 					   _NL_COLLATE_SYMB_EXTRAMB);
 
-			    /* Locate the character in the hashing table.  */
-			    hash = elem_hash (str, c1);
-
-			    idx = 0;
-			    elem = hash % table_size;
-			    if (symb_table[2 * elem] != 0)
-			      {
-				second = hash % (table_size - 2) + 1;
-
-				do
-				  {
-				    /* First compare the hashing value.  */
-				    if (symb_table[2 * elem] == hash
-					&& (c1
-					    == extra[symb_table[2 * elem + 1]])
-					&& memcmp (str,
-						   &extra[symb_table[2 * elem
-								     + 1]
-							  + 1], c1) == 0)
-				      {
-					/* Yep, this is the entry.  */
-					idx = symb_table[2 * elem + 1];
-					idx += 1 + extra[idx];
-					break;
-				      }
-
-				    /* Next entry.  */
-				    elem += second;
-				  }
-				while (symb_table[2 * elem] != 0);
-			      }
+			    for (elem = 0; elem < table_size; elem++)
+			      if (symb_table[2 * elem] != 0)
+				{
+				  idx = symb_table[2 * elem + 1];
+				  /* Skip the name of collating element.  */
+				  idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+				  /* Skip the byte sequence of the
+				     collating element.  */
+				  idx += 1 + extra[idx];
+				  /* Adjust for the alignment.  */
+				  idx = (idx + 3) & ~3;
+
+				  wextra = (int32_t *) &extra[idx + 4];
+
+				  if (/* Compare the length of the sequence.  */
+				      c1 == wextra[0]
+				      /* Compare the wide char sequence.  */
+				      && memcmp (startp + 1, &wextra[1],
+						 c1 * sizeof (UCHAR)) == 0)
+				    /* Yep, this is the entry.  */
+				    break;
+# else
+				  if (/* Compare the length of the sequence.  */
+				      c1 == extra[idx]
+				      /* Compare the byte sequence.  */
+				      && memcmp (startp + 1,
+						 &extra[idx + 1], c1) == 0)
+				    /* Yep, this is the entry.  */
+				    break;
+# endif
+				}
 
-			    if (symb_table[2 * elem] != 0)
+			    if (elem < table_size)
 			      {
 				/* Compare the byte sequence but only if
 				   this is not part of a range.  */
-# if WIDE_CHAR_VERSION
-				int32_t *wextra;
+				if (! is_range
 
-				idx += 1 + extra[idx];
-				/* Adjust for the alignment.  */
-				idx = (idx + 3) & ~3;
-
-				wextra = (int32_t *) &extra[idx + 4];
-# endif
-
-				if (! is_range)
-				  {
 # if WIDE_CHAR_VERSION
-				    for (c1 = 0;
-					 (int32_t) c1 < wextra[idx];
-					 ++c1)
-				      if (n[c1] != wextra[1 + c1])
-					break;
-
-				    if ((int32_t) c1 == wextra[idx])
-				      goto matched;
+				    && memcmp (n, &wextra[1],
+					       c1 * sizeof (UCHAR)) == 0
 # else
-				    for (c1 = 0; c1 < extra[idx]; ++c1)
-				      if (n[c1] != extra[1 + c1])
-					break;
-
-				    if (c1 == extra[idx])
-				      goto matched;
+				    && memcmp (n, &extra[idx + 1], c1) == 0
 # endif
+				    )
+				  {
+				    n += c1 - 1;
+				    goto matched;
 				  }
 
 				/* Get the collation sequence value.  */
@@ -601,9 +571,9 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 # if WIDE_CHAR_VERSION
 				cold = wextra[1 + wextra[idx]];
 # else
-				/* Adjust for the alignment.  */
 				idx += 1 + extra[idx];
-				idx = (idx + 3) & ~4;
+				/* Adjust for the alignment.  */
+				idx = (idx + 3) & ~3;
 				cold = *((int32_t *) &extra[idx]);
 # endif
 
@@ -613,10 +583,10 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			      {
 				/* No valid character.  Match it as a
 				   single byte.  */
-				if (!is_range && *n == str[0])
+				if (!is_range && *n == startp[1])
 				  goto matched;
 
-				cold = str[0];
+				cold = startp[1];
 				c = *p++;
 			      }
 			    else
@@ -624,7 +594,6 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			  }
 		      }
 		    else
-# undef str
 #endif
 		      {
 			c = FOLD (c);
@@ -716,25 +685,11 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 			      {
 				int32_t table_size;
 				const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
-				char str[c1];
-				unsigned int strcnt;
-# else
-#  define str (startp + 1)
-# endif
 				const unsigned char *extra;
 				int32_t idx;
 				int32_t elem;
-				int32_t second;
-				int32_t hash;
-
 # if WIDE_CHAR_VERSION
-				/* We have to convert the name to a single-byte
-				   string.  This is possible since the names
-				   consist of ASCII characters and the internal
-				   representation is UCS4.  */
-				for (strcnt = 0; strcnt < c1; ++strcnt)
-				  str[strcnt] = startp[1 + strcnt];
+				int32_t *wextra;
 # endif
 
 				table_size =
@@ -747,51 +702,44 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 				  _NL_CURRENT (LC_COLLATE,
 					       _NL_COLLATE_SYMB_EXTRAMB);
 
-				/* Locate the character in the hashing
-				   table.  */
-				hash = elem_hash (str, c1);
-
-				idx = 0;
-				elem = hash % table_size;
-				if (symb_table[2 * elem] != 0)
-				  {
-				    second = hash % (table_size - 2) + 1;
-
-				    do
-				      {
-					/* First compare the hashing value.  */
-					if (symb_table[2 * elem] == hash
-					    && (c1
-						== extra[symb_table[2 * elem + 1]])
-					    && memcmp (str,
-						       &extra[symb_table[2 * elem + 1]
-							      + 1], c1) == 0)
-					  {
-					    /* Yep, this is the entry.  */
-					    idx = symb_table[2 * elem + 1];
-					    idx += 1 + extra[idx];
-					    break;
-					  }
-
-					/* Next entry.  */
-					elem += second;
-				      }
-				    while (symb_table[2 * elem] != 0);
-				  }
-
-				if (symb_table[2 * elem] != 0)
-				  {
-				    /* Compare the byte sequence but only if
-				       this is not part of a range.  */
+				for (elem = 0; elem < table_size; elem++)
+				  if (symb_table[2 * elem] != 0)
+				    {
+				      idx = symb_table[2 * elem + 1];
+				      /* Skip the name of collating
+					 element.  */
+				      idx += 1 + extra[idx];
 # if WIDE_CHAR_VERSION
-				    int32_t *wextra;
-
-				    idx += 1 + extra[idx];
-				    /* Adjust for the alignment.  */
-				    idx = (idx + 3) & ~4;
-
-				    wextra = (int32_t *) &extra[idx + 4];
+				      /* Skip the byte sequence of the
+					 collating element.  */
+				      idx += 1 + extra[idx];
+				      /* Adjust for the alignment.  */
+				      idx = (idx + 3) & ~3;
+
+				      wextra = (int32_t *) &extra[idx + 4];
+
+				      if (/* Compare the length of the
+					     sequence.  */
+					  c1 == wextra[0]
+					  /* Compare the wide char sequence.  */
+					  && memcmp (startp + 1, &wextra[1],
+						     c1 * sizeof (int32_t)) == 0)
+					/* Yep, this is the entry.  */
+					break;
+# else
+				      if (/* Compare the length of the
+					     sequence.  */
+					  c1 == extra[idx]
+					  /* Compare the byte sequence.  */
+					  && memcmp (startp + 1,
+						     &extra[idx + 1], c1) == 0)
+					/* Yep, this is the entry.  */
+					break;
 # endif
+				    }
+
+				if (elem < table_size)
+				  {
 				    /* Get the collation sequence value.  */
 				    is_seqval = 1;
 # if WIDE_CHAR_VERSION
@@ -799,19 +747,18 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
 # else
 				    /* Adjust for the alignment.  */
 				    idx += 1 + extra[idx];
-				    idx = (idx + 3) & ~4;
+				    idx = (idx + 3) & ~3;
 				    cend = *((int32_t *) &extra[idx]);
 # endif
 				  }
-				else if (symb_table[2 * elem] != 0 && c1 == 1)
+				else if (c1 == 1)
 				  {
-				    cend = str[0];
+				    cend = startp[1];
 				    c = *p++;
 				  }
 				else
 				  return FNM_NOMATCH;
 			      }
-# undef str
 			  }
 			else
 			  {
diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c
new file mode 100644
index 0000000..14829bd
--- /dev/null
+++ b/posix/tst-fnmatch4.c
@@ -0,0 +1,51 @@
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <stdio.h>
+#include <locale.h>
+#include <fnmatch.h>
+
+static int
+do_test_locale (const char *locale)
+{
+  const char *pattern = "[[.ll.]]";
+
+  if (setlocale (LC_ALL, locale) == NULL)
+    {
+      printf ("could not set locale %s\n", locale);
+      return 1;
+    }
+
+  if (fnmatch (pattern, "ll", 0) != 0)
+    {
+      printf ("%s didn't match in locale %s\n", pattern, locale);
+      return 1;
+    }
+
+  return 0;
+}
+
+static int
+do_test (void)
+{
+  return (do_test_locale ("es_US.ISO-8859-1")
+	  || do_test_locale ("es_US.UTF-8"));
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c
new file mode 100644
index 0000000..d2b8d16
--- /dev/null
+++ b/posix/tst-fnmatch5.c
@@ -0,0 +1,53 @@
+/* Test for fnmatch handling of collating elements
+   Copyright (C) 2014 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <fnmatch.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+
+#define LENGTH 20000000
+
+char pattern[LENGTH + 7];
+
+static int
+do_test (void)
+{
+  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+    {
+      puts ("could not set locale");
+      return 1;
+    }
+  pattern[0] = '[';
+  pattern[1] = '[';
+  pattern[2] = '.';
+  memset (pattern + 3, 'a', LENGTH);
+  pattern[LENGTH + 3] = '.';
+  pattern[LENGTH + 4] = ']';
+  pattern[LENGTH + 5] = ']';
+  int ret = fnmatch (pattern, "a", 0);
+  if (ret == 0)
+    {
+      puts ("fnmatch returned 0 for invalid pattern");
+      return 1;
+    }
+  return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
-- 
2.1.0

-- 
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2014-09-18  7:31 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-17 10:19 [PATCH] Fix fnmatch handling of collating elements (BZ #17396) Andreas Schwab
2014-09-17 15:39 ` Joseph S. Myers
2014-09-18  7:31   ` Andreas Schwab

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).