* [PATCH] Fix fnmatch handling of collating elements (BZ #17396)
@ 2014-09-17 10:19 Andreas Schwab
2014-09-17 15:39 ` Joseph S. Myers
0 siblings, 1 reply; 3+ messages in thread
From: Andreas Schwab @ 2014-09-17 10:19 UTC (permalink / raw)
To: libc-alpha
This is basically the same as BZ #11561, only for fnmatch, and mixing in
a couple of additional bugs.
It raises a few points:
- There is no efficient way to look up a collating element, like regcomp
it needs to loop through all the collating symbol names to find the
associated byte sequences and compare it with the given collating
element.
- This also removes the last use of elem_hash in locale/elem-hash.h (apart
from localedef which generates the hash table). AFAICS the collating
symbol names serve no purpose outside of the locale source files.
Andreas.
[BZ #17396]
* posix/fnmatch_loop.c (FCT): When looking up collating elements
match against (wide) character sequence instead of name. Correct
alignment adjustment.
* posix/fnmatch.c: Don't include "../locale/elem-hash.h".
* posix/Makefile (tests): Add tst-fnmatch4.
* posix/tst-fnmatch4.c: New file.
* Makefile (LOCALES): Add es_US.UTF-8 and es_US.ISO-8859-1.
---
localedata/Makefile | 2 +-
posix/Makefile | 2 +-
posix/fnmatch.c | 1 -
posix/fnmatch_loop.c | 227 ++++++++++++++++++++-------------------------------
posix/tst-fnmatch4.c | 39 +++++++++
5 files changed, 128 insertions(+), 143 deletions(-)
create mode 100644 posix/tst-fnmatch4.c
diff --git a/localedata/Makefile b/localedata/Makefile
index 6424f66..acaa233 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
- tr_TR.ISO-8859-9 en_GB.UTF-8
+ tr_TR.ISO-8859-9 en_GB.UTF-8 es_US.UTF-8 es_US.ISO-8859-1
LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
CHARMAPS := $(shell echo "$(LOCALES)" | \
sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/posix/Makefile b/posix/Makefile
index e6b69b4..174a10a 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -87,7 +87,7 @@ tests := tstgetopt testfnm runtests runptests \
bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
- tst-fnmatch3 bug-regex36
+ tst-fnmatch3 bug-regex36 tst-fnmatch4
xtests := bug-ga2
ifeq (yes,$(build-shared))
test-srcs := globtest
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 85a6ec2..316c6d9 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -53,7 +53,6 @@
we support a correct implementation only in glibc. */
#ifdef _LIBC
# include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
# include "../locale/coll-lookup.h"
# include <shlib-compat.h>
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index db6d9d7..fd9cc9f 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -498,26 +498,12 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
{
int32_t table_size;
const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
- char str[c1];
- unsigned int strcnt;
-# else
-# define str (startp + 1)
-# endif
const unsigned char *extra;
int32_t idx;
int32_t elem;
- int32_t second;
- int32_t hash;
-
# if WIDE_CHAR_VERSION
- /* We have to convert the name to a single-byte
- string. This is possible since the names
- consist of ASCII characters and the internal
- representation is UCS4. */
- for (strcnt = 0; strcnt < c1; ++strcnt)
- str[strcnt] = startp[1 + strcnt];
-#endif
+ int32_t *wextra;
+# endif
table_size =
_NL_CURRENT_WORD (LC_COLLATE,
@@ -529,71 +515,55 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
- /* Locate the character in the hashing table. */
- hash = elem_hash (str, c1);
-
- idx = 0;
- elem = hash % table_size;
- if (symb_table[2 * elem] != 0)
- {
- second = hash % (table_size - 2) + 1;
-
- do
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- && (c1
- == extra[symb_table[2 * elem + 1]])
- && memcmp (str,
- &extra[symb_table[2 * elem
- + 1]
- + 1], c1) == 0)
- {
- /* Yep, this is the entry. */
- idx = symb_table[2 * elem + 1];
- idx += 1 + extra[idx];
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- while (symb_table[2 * elem] != 0);
- }
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element. */
+ idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+ /* Skip the byte sequence of the
+ collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+
+ wextra = (int32_t *) &extra[idx + 4];
+
+ if (/* Compare the length of the sequence. */
+ c1 == wextra[0]
+ /* Compare the wide char sequence. */
+ && memcmp (startp + 1, &wextra[1],
+ c1 * sizeof (UCHAR)) == 0)
+ /* Yep, this is the entry. */
+ break;
+# else
+ if (/* Compare the length of the sequence. */
+ c1 == extra[idx]
+ /* Compare the byte sequence. */
+ && memcmp (startp + 1,
+ &extra[idx + 1], c1) == 0)
+ /* Yep, this is the entry. */
+ break;
+# endif
+ }
- if (symb_table[2 * elem] != 0)
+ if (elem < table_size)
{
/* Compare the byte sequence but only if
this is not part of a range. */
-# if WIDE_CHAR_VERSION
- int32_t *wextra;
+ if (! is_range
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
-
- wextra = (int32_t *) &extra[idx + 4];
-# endif
-
- if (! is_range)
- {
# if WIDE_CHAR_VERSION
- for (c1 = 0;
- (int32_t) c1 < wextra[idx];
- ++c1)
- if (n[c1] != wextra[1 + c1])
- break;
-
- if ((int32_t) c1 == wextra[idx])
- goto matched;
+ && memcmp (n, &wextra[1],
+ c1 * sizeof (UCHAR)) == 0
# else
- for (c1 = 0; c1 < extra[idx]; ++c1)
- if (n[c1] != extra[1 + c1])
- break;
-
- if (c1 == extra[idx])
- goto matched;
+ && memcmp (n, &extra[idx + 1], c1) == 0
# endif
+ )
+ {
+ n += c1 - 1;
+ goto matched;
}
/* Get the collation sequence value. */
@@ -601,9 +571,9 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
# if WIDE_CHAR_VERSION
cold = wextra[1 + wextra[idx]];
# else
- /* Adjust for the alignment. */
idx += 1 + extra[idx];
- idx = (idx + 3) & ~4;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
cold = *((int32_t *) &extra[idx]);
# endif
@@ -613,10 +583,10 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
{
/* No valid character. Match it as a
single byte. */
- if (!is_range && *n == str[0])
+ if (!is_range && *n == startp[1])
goto matched;
- cold = str[0];
+ cold = startp[1];
c = *p++;
}
else
@@ -624,7 +594,6 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
}
}
else
-# undef str
#endif
{
c = FOLD (c);
@@ -716,25 +685,11 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
{
int32_t table_size;
const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
- char str[c1];
- unsigned int strcnt;
-# else
-# define str (startp + 1)
-# endif
const unsigned char *extra;
int32_t idx;
int32_t elem;
- int32_t second;
- int32_t hash;
-
# if WIDE_CHAR_VERSION
- /* We have to convert the name to a single-byte
- string. This is possible since the names
- consist of ASCII characters and the internal
- representation is UCS4. */
- for (strcnt = 0; strcnt < c1; ++strcnt)
- str[strcnt] = startp[1 + strcnt];
+ int32_t *wextra;
# endif
table_size =
@@ -747,51 +702,44 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
- /* Locate the character in the hashing
- table. */
- hash = elem_hash (str, c1);
-
- idx = 0;
- elem = hash % table_size;
- if (symb_table[2 * elem] != 0)
- {
- second = hash % (table_size - 2) + 1;
-
- do
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- && (c1
- == extra[symb_table[2 * elem + 1]])
- && memcmp (str,
- &extra[symb_table[2 * elem + 1]
- + 1], c1) == 0)
- {
- /* Yep, this is the entry. */
- idx = symb_table[2 * elem + 1];
- idx += 1 + extra[idx];
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- while (symb_table[2 * elem] != 0);
- }
-
- if (symb_table[2 * elem] != 0)
- {
- /* Compare the byte sequence but only if
- this is not part of a range. */
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating
+ element. */
+ idx += 1 + extra[idx];
# if WIDE_CHAR_VERSION
- int32_t *wextra;
-
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~4;
-
- wextra = (int32_t *) &extra[idx + 4];
+ /* Skip the byte sequence of the
+ collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+
+ wextra = (int32_t *) &extra[idx + 4];
+
+ if (/* Compare the length of the
+ sequence. */
+ c1 == wextra[0]
+ /* Compare the wide char sequence. */
+ && memcmp (startp + 1, &wextra[1],
+ c1 * sizeof (int32_t)) == 0)
+ /* Yep, this is the entry. */
+ break;
+# else
+ if (/* Compare the length of the
+ sequence. */
+ c1 == extra[idx]
+ /* Compare the byte sequence. */
+ && memcmp (startp + 1,
+ &extra[idx + 1], c1) == 0)
+ /* Yep, this is the entry. */
+ break;
# endif
+ }
+
+ if (elem < table_size)
+ {
/* Get the collation sequence value. */
is_seqval = 1;
# if WIDE_CHAR_VERSION
@@ -799,19 +747,18 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
# else
/* Adjust for the alignment. */
idx += 1 + extra[idx];
- idx = (idx + 3) & ~4;
+ idx = (idx + 3) & ~3;
cend = *((int32_t *) &extra[idx]);
# endif
}
- else if (symb_table[2 * elem] != 0 && c1 == 1)
+ else if (c1 == 1)
{
- cend = str[0];
+ cend = startp[1];
c = *p++;
}
else
return FNM_NOMATCH;
}
-# undef str
}
else
{
diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c
new file mode 100644
index 0000000..5ad284a
--- /dev/null
+++ b/posix/tst-fnmatch4.c
@@ -0,0 +1,39 @@
+/* Test for fnmatch handling of collating elements
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <locale.h>
+#include <fnmatch.h>
+
+int
+do_test (void)
+{
+ const char *pattern = "[[.ll.]]";
+
+ setlocale (LC_ALL, "es_US.ISO-8859-1");
+ if (fnmatch (pattern, "ll", 0) != 0)
+ return 1;
+
+ setlocale (LC_ALL, "es_US.UTF-8");
+ if (fnmatch (pattern, "ll", 0) != 0)
+ return 1;
+
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
--
2.1.0
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Fix fnmatch handling of collating elements (BZ #17396)
2014-09-17 10:19 [PATCH] Fix fnmatch handling of collating elements (BZ #17396) Andreas Schwab
@ 2014-09-17 15:39 ` Joseph S. Myers
2014-09-18 7:31 ` Andreas Schwab
0 siblings, 1 reply; 3+ messages in thread
From: Joseph S. Myers @ 2014-09-17 15:39 UTC (permalink / raw)
To: Andreas Schwab; +Cc: libc-alpha
On Wed, 17 Sep 2014, Andreas Schwab wrote:
> - char str[c1];
> - char str[c1];
Does this fix the unbounded VLA allocation (bug 16976)? If so, then apart
from mentioning [BZ #16976] I think a testcase for that bug should be
added as well.
--
Joseph S. Myers
joseph@codesourcery.com
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] Fix fnmatch handling of collating elements (BZ #17396)
2014-09-17 15:39 ` Joseph S. Myers
@ 2014-09-18 7:31 ` Andreas Schwab
0 siblings, 0 replies; 3+ messages in thread
From: Andreas Schwab @ 2014-09-18 7:31 UTC (permalink / raw)
To: Joseph S. Myers; +Cc: libc-alpha
"Joseph S. Myers" <joseph@codesourcery.com> writes:
> On Wed, 17 Sep 2014, Andreas Schwab wrote:
>
>> - char str[c1];
>
>> - char str[c1];
>
> Does this fix the unbounded VLA allocation (bug 16976)? If so, then apart
> from mentioning [BZ #16976] I think a testcase for that bug should be
> added as well.
Thanks, here's an updated patch.
Andreas.
[BZ #16976]
[BZ #17396]
* posix/fnmatch_loop.c (internal_fnmatch, internal_fnwmatch): When
looking up collating elements match against (wide) character
sequence instead of name. Correct alignment adjustment.
* posix/fnmatch.c: Don't include "../locale/elem-hash.h".
* posix/Makefile (tests): Add tst-fnmatch4 and tst-fnmatch5.
* posix/tst-fnmatch4.c: New file.
* posix/tst-fnmatch5.c: New file.
* Makefile (LOCALES): Add es_US.UTF-8 and es_US.ISO-8859-1.
---
localedata/Makefile | 2 +-
posix/Makefile | 2 +-
posix/fnmatch.c | 1 -
posix/fnmatch_loop.c | 227 ++++++++++++++++++++-------------------------------
posix/tst-fnmatch4.c | 51 ++++++++++++
posix/tst-fnmatch5.c | 53 ++++++++++++
6 files changed, 193 insertions(+), 143 deletions(-)
create mode 100644 posix/tst-fnmatch4.c
create mode 100644 posix/tst-fnmatch5.c
diff --git a/localedata/Makefile b/localedata/Makefile
index 6424f66..acaa233 100644
--- a/localedata/Makefile
+++ b/localedata/Makefile
@@ -106,7 +106,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 tr_TR.UTF-8 cs_CZ.UTF-8 \
zh_TW.EUC-TW fa_IR.UTF-8 fr_FR.UTF-8 ja_JP.UTF-8 si_LK.UTF-8 \
- tr_TR.ISO-8859-9 en_GB.UTF-8
+ tr_TR.ISO-8859-9 en_GB.UTF-8 es_US.UTF-8 es_US.ISO-8859-1
LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
CHARMAPS := $(shell echo "$(LOCALES)" | \
sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)
diff --git a/posix/Makefile b/posix/Makefile
index e6b69b4..bea01cb 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -87,7 +87,7 @@ tests := tstgetopt testfnm runtests runptests \
bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
bug-getopt5 tst-getopt_long1 bug-regex34 bug-regex35 \
tst-pathconf tst-getaddrinfo4 tst-rxspencer-no-utf8 \
- tst-fnmatch3 bug-regex36
+ tst-fnmatch3 bug-regex36 tst-fnmatch4 tst-fnmatch5
xtests := bug-ga2
ifeq (yes,$(build-shared))
test-srcs := globtest
diff --git a/posix/fnmatch.c b/posix/fnmatch.c
index 85a6ec2..316c6d9 100644
--- a/posix/fnmatch.c
+++ b/posix/fnmatch.c
@@ -53,7 +53,6 @@
we support a correct implementation only in glibc. */
#ifdef _LIBC
# include "../locale/localeinfo.h"
-# include "../locale/elem-hash.h"
# include "../locale/coll-lookup.h"
# include <shlib-compat.h>
diff --git a/posix/fnmatch_loop.c b/posix/fnmatch_loop.c
index db6d9d7..fd9cc9f 100644
--- a/posix/fnmatch_loop.c
+++ b/posix/fnmatch_loop.c
@@ -498,26 +498,12 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
{
int32_t table_size;
const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
- char str[c1];
- unsigned int strcnt;
-# else
-# define str (startp + 1)
-# endif
const unsigned char *extra;
int32_t idx;
int32_t elem;
- int32_t second;
- int32_t hash;
-
# if WIDE_CHAR_VERSION
- /* We have to convert the name to a single-byte
- string. This is possible since the names
- consist of ASCII characters and the internal
- representation is UCS4. */
- for (strcnt = 0; strcnt < c1; ++strcnt)
- str[strcnt] = startp[1 + strcnt];
-#endif
+ int32_t *wextra;
+# endif
table_size =
_NL_CURRENT_WORD (LC_COLLATE,
@@ -529,71 +515,55 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
- /* Locate the character in the hashing table. */
- hash = elem_hash (str, c1);
-
- idx = 0;
- elem = hash % table_size;
- if (symb_table[2 * elem] != 0)
- {
- second = hash % (table_size - 2) + 1;
-
- do
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- && (c1
- == extra[symb_table[2 * elem + 1]])
- && memcmp (str,
- &extra[symb_table[2 * elem
- + 1]
- + 1], c1) == 0)
- {
- /* Yep, this is the entry. */
- idx = symb_table[2 * elem + 1];
- idx += 1 + extra[idx];
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- while (symb_table[2 * elem] != 0);
- }
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating element. */
+ idx += 1 + extra[idx];
+# if WIDE_CHAR_VERSION
+ /* Skip the byte sequence of the
+ collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+
+ wextra = (int32_t *) &extra[idx + 4];
+
+ if (/* Compare the length of the sequence. */
+ c1 == wextra[0]
+ /* Compare the wide char sequence. */
+ && memcmp (startp + 1, &wextra[1],
+ c1 * sizeof (UCHAR)) == 0)
+ /* Yep, this is the entry. */
+ break;
+# else
+ if (/* Compare the length of the sequence. */
+ c1 == extra[idx]
+ /* Compare the byte sequence. */
+ && memcmp (startp + 1,
+ &extra[idx + 1], c1) == 0)
+ /* Yep, this is the entry. */
+ break;
+# endif
+ }
- if (symb_table[2 * elem] != 0)
+ if (elem < table_size)
{
/* Compare the byte sequence but only if
this is not part of a range. */
-# if WIDE_CHAR_VERSION
- int32_t *wextra;
+ if (! is_range
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~3;
-
- wextra = (int32_t *) &extra[idx + 4];
-# endif
-
- if (! is_range)
- {
# if WIDE_CHAR_VERSION
- for (c1 = 0;
- (int32_t) c1 < wextra[idx];
- ++c1)
- if (n[c1] != wextra[1 + c1])
- break;
-
- if ((int32_t) c1 == wextra[idx])
- goto matched;
+ && memcmp (n, &wextra[1],
+ c1 * sizeof (UCHAR)) == 0
# else
- for (c1 = 0; c1 < extra[idx]; ++c1)
- if (n[c1] != extra[1 + c1])
- break;
-
- if (c1 == extra[idx])
- goto matched;
+ && memcmp (n, &extra[idx + 1], c1) == 0
# endif
+ )
+ {
+ n += c1 - 1;
+ goto matched;
}
/* Get the collation sequence value. */
@@ -601,9 +571,9 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
# if WIDE_CHAR_VERSION
cold = wextra[1 + wextra[idx]];
# else
- /* Adjust for the alignment. */
idx += 1 + extra[idx];
- idx = (idx + 3) & ~4;
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
cold = *((int32_t *) &extra[idx]);
# endif
@@ -613,10 +583,10 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
{
/* No valid character. Match it as a
single byte. */
- if (!is_range && *n == str[0])
+ if (!is_range && *n == startp[1])
goto matched;
- cold = str[0];
+ cold = startp[1];
c = *p++;
}
else
@@ -624,7 +594,6 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
}
}
else
-# undef str
#endif
{
c = FOLD (c);
@@ -716,25 +685,11 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
{
int32_t table_size;
const int32_t *symb_table;
-# if WIDE_CHAR_VERSION
- char str[c1];
- unsigned int strcnt;
-# else
-# define str (startp + 1)
-# endif
const unsigned char *extra;
int32_t idx;
int32_t elem;
- int32_t second;
- int32_t hash;
-
# if WIDE_CHAR_VERSION
- /* We have to convert the name to a single-byte
- string. This is possible since the names
- consist of ASCII characters and the internal
- representation is UCS4. */
- for (strcnt = 0; strcnt < c1; ++strcnt)
- str[strcnt] = startp[1 + strcnt];
+ int32_t *wextra;
# endif
table_size =
@@ -747,51 +702,44 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
_NL_CURRENT (LC_COLLATE,
_NL_COLLATE_SYMB_EXTRAMB);
- /* Locate the character in the hashing
- table. */
- hash = elem_hash (str, c1);
-
- idx = 0;
- elem = hash % table_size;
- if (symb_table[2 * elem] != 0)
- {
- second = hash % (table_size - 2) + 1;
-
- do
- {
- /* First compare the hashing value. */
- if (symb_table[2 * elem] == hash
- && (c1
- == extra[symb_table[2 * elem + 1]])
- && memcmp (str,
- &extra[symb_table[2 * elem + 1]
- + 1], c1) == 0)
- {
- /* Yep, this is the entry. */
- idx = symb_table[2 * elem + 1];
- idx += 1 + extra[idx];
- break;
- }
-
- /* Next entry. */
- elem += second;
- }
- while (symb_table[2 * elem] != 0);
- }
-
- if (symb_table[2 * elem] != 0)
- {
- /* Compare the byte sequence but only if
- this is not part of a range. */
+ for (elem = 0; elem < table_size; elem++)
+ if (symb_table[2 * elem] != 0)
+ {
+ idx = symb_table[2 * elem + 1];
+ /* Skip the name of collating
+ element. */
+ idx += 1 + extra[idx];
# if WIDE_CHAR_VERSION
- int32_t *wextra;
-
- idx += 1 + extra[idx];
- /* Adjust for the alignment. */
- idx = (idx + 3) & ~4;
-
- wextra = (int32_t *) &extra[idx + 4];
+ /* Skip the byte sequence of the
+ collating element. */
+ idx += 1 + extra[idx];
+ /* Adjust for the alignment. */
+ idx = (idx + 3) & ~3;
+
+ wextra = (int32_t *) &extra[idx + 4];
+
+ if (/* Compare the length of the
+ sequence. */
+ c1 == wextra[0]
+ /* Compare the wide char sequence. */
+ && memcmp (startp + 1, &wextra[1],
+ c1 * sizeof (int32_t)) == 0)
+ /* Yep, this is the entry. */
+ break;
+# else
+ if (/* Compare the length of the
+ sequence. */
+ c1 == extra[idx]
+ /* Compare the byte sequence. */
+ && memcmp (startp + 1,
+ &extra[idx + 1], c1) == 0)
+ /* Yep, this is the entry. */
+ break;
# endif
+ }
+
+ if (elem < table_size)
+ {
/* Get the collation sequence value. */
is_seqval = 1;
# if WIDE_CHAR_VERSION
@@ -799,19 +747,18 @@ FCT (pattern, string, string_end, no_leading_period, flags, ends, alloca_used)
# else
/* Adjust for the alignment. */
idx += 1 + extra[idx];
- idx = (idx + 3) & ~4;
+ idx = (idx + 3) & ~3;
cend = *((int32_t *) &extra[idx]);
# endif
}
- else if (symb_table[2 * elem] != 0 && c1 == 1)
+ else if (c1 == 1)
{
- cend = str[0];
+ cend = startp[1];
c = *p++;
}
else
return FNM_NOMATCH;
}
-# undef str
}
else
{
diff --git a/posix/tst-fnmatch4.c b/posix/tst-fnmatch4.c
new file mode 100644
index 0000000..14829bd
--- /dev/null
+++ b/posix/tst-fnmatch4.c
@@ -0,0 +1,51 @@
+/* Test for fnmatch handling of collating elements
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <stdio.h>
+#include <locale.h>
+#include <fnmatch.h>
+
+static int
+do_test_locale (const char *locale)
+{
+ const char *pattern = "[[.ll.]]";
+
+ if (setlocale (LC_ALL, locale) == NULL)
+ {
+ printf ("could not set locale %s\n", locale);
+ return 1;
+ }
+
+ if (fnmatch (pattern, "ll", 0) != 0)
+ {
+ printf ("%s didn't match in locale %s\n", pattern, locale);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+do_test (void)
+{
+ return (do_test_locale ("es_US.ISO-8859-1")
+ || do_test_locale ("es_US.UTF-8"));
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/tst-fnmatch5.c b/posix/tst-fnmatch5.c
new file mode 100644
index 0000000..d2b8d16
--- /dev/null
+++ b/posix/tst-fnmatch5.c
@@ -0,0 +1,53 @@
+/* Test for fnmatch handling of collating elements
+ Copyright (C) 2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fnmatch.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+
+#define LENGTH 20000000
+
+char pattern[LENGTH + 7];
+
+static int
+do_test (void)
+{
+ if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+ {
+ puts ("could not set locale");
+ return 1;
+ }
+ pattern[0] = '[';
+ pattern[1] = '[';
+ pattern[2] = '.';
+ memset (pattern + 3, 'a', LENGTH);
+ pattern[LENGTH + 3] = '.';
+ pattern[LENGTH + 4] = ']';
+ pattern[LENGTH + 5] = ']';
+ int ret = fnmatch (pattern, "a", 0);
+ if (ret == 0)
+ {
+ puts ("fnmatch returned 0 for invalid pattern");
+ return 1;
+ }
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
--
2.1.0
--
Andreas Schwab, SUSE Labs, schwab@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE 1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-09-18 7:31 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-17 10:19 [PATCH] Fix fnmatch handling of collating elements (BZ #17396) Andreas Schwab
2014-09-17 15:39 ` Joseph S. Myers
2014-09-18 7:31 ` Andreas Schwab
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).