From: Jakub Jelinek <jakub@redhat.com>
To: Ulrich Drepper <drepper@redhat.com>, Roland McGrath <roland@redhat.com>
Cc: Glibc hackers <libc-hacker@sources.redhat.com>
Subject: [PATCH] Fix regex mb char handling
Date: Thu, 17 Mar 2005 21:48:00 -0000 [thread overview]
Message-ID: <20050317214810.GG4961@sunsite.mff.cuni.cz> (raw)
Hi!
The following patch fixes segfault on:
#include <locale.h>
#include <regex.h>
#include <stdio.h>
int main (void)
{
setlocale (LC_ALL, "zh_CN");
regex_t re;
if (regcomp (&re, "abc", REG_ICASE))
{
puts ("regcomp failed");
return 1;
}
if (regexec (&re, "\xa8\xa7", 0, NULL, 0) != REG_NOMATCH)
{
puts ("regexec unexpectedly succeeded");
return 1;
}
regfree (&re);
return 0;
}
testcase (not included yet in the patch, because I want to find out if
some already built testing locale has similar property, particularly
that for a mb char its towupper (resp. towlower) char is not
represent in the locale's character set).
2005-03-17 Jakub Jelinek <jakub@redhat.com>
* posix/regcomp.c (re_compile_fastmap_iter): Fix check for failed
__wcrtomb. Check return values of other __wcrtomb calls.
* posix/regex_internal.c (build_wcs_buffer, re_string_skip_chars):
Change mbclen type to size_t.
(build_wcs_upper_buffer): Change mbclen and mbcdlen type to size_t.
Handle mb chars whose upper case doesn't have multibyte representation
in locale's charset.
--- libc/posix/regcomp.c.jj 2005-02-22 10:02:42.000000000 +0100
+++ libc/posix/regcomp.c 2005-03-17 21:06:43.622233856 +0100
@@ -359,7 +359,8 @@ re_compile_fastmap_iter (bufp, init_stat
memset (&state, 0, sizeof (state));
if (mbrtowc (&wc, (const char *) buf, p - buf,
&state) == p - buf
- && __wcrtomb ((char *) buf, towlower (wc), &state) > 0)
+ && (__wcrtomb ((char *) buf, towlower (wc), &state)
+ != (size_t) -1))
re_set_fastmap (fastmap, 0, buf[0]);
}
#endif
@@ -409,12 +410,13 @@ re_compile_fastmap_iter (bufp, init_stat
char buf[256];
mbstate_t state;
memset (&state, '\0', sizeof (state));
- __wcrtomb (buf, cset->mbchars[i], &state);
- re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+ if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+ re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
{
- __wcrtomb (buf, towlower (cset->mbchars[i]), &state);
- re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+ if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+ != (size_t) -1)
+ re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
}
}
}
--- libc/posix/regex_internal.c.jj 2005-03-08 13:05:18.000000000 +0100
+++ libc/posix/regex_internal.c 2005-03-17 21:17:48.069329822 +0100
@@ -220,7 +220,8 @@ build_wcs_buffer (pstr)
unsigned char buf[64];
#endif
mbstate_t prev_st;
- int byte_idx, end_idx, mbclen, remain_len;
+ int byte_idx, end_idx, remain_len;
+ size_t mbclen;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
@@ -281,7 +282,8 @@ build_wcs_upper_buffer (pstr)
re_string_t *pstr;
{
mbstate_t prev_st;
- int src_idx, byte_idx, end_idx, mbclen, remain_len;
+ int src_idx, byte_idx, end_idx, remain_len;
+ size_t mbclen;
#ifdef _LIBC
char buf[MB_CUR_MAX];
assert (MB_CUR_MAX >= pstr->mb_cur_max);
@@ -318,12 +320,12 @@ build_wcs_upper_buffer (pstr)
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
- if (BE (mbclen > 0, 1))
+ if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
{
- int mbcdlen;
+ size_t mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
@@ -386,20 +388,20 @@ build_wcs_upper_buffer (pstr)
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
- if (BE (mbclen > 0, 1))
+ if (BE (mbclen + 2 > 2, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
{
- int mbcdlen;
+ size_t mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
- else
+ else if (mbcdlen != (size_t) -1)
{
- int i;
+ size_t i;
if (byte_idx + mbcdlen > pstr->bufs_len)
{
@@ -416,7 +418,7 @@ build_wcs_upper_buffer (pstr)
}
if (!pstr->offsets_needed)
{
- for (i = 0; i < byte_idx; ++i)
+ for (i = 0; i < (size_t) byte_idx; ++i)
pstr->offsets[i] = i;
pstr->offsets_needed = 1;
}
@@ -439,13 +441,15 @@ build_wcs_upper_buffer (pstr)
src_idx += mbclen;
continue;
}
+ else
+ memcpy (pstr->mbs + byte_idx, p, mbclen);
}
else
memcpy (pstr->mbs + byte_idx, p, mbclen);
if (BE (pstr->offsets_needed != 0, 0))
{
- int i;
+ size_t i;
for (i = 0; i < mbclen; ++i)
pstr->offsets[byte_idx + i] = src_idx + i;
}
@@ -496,7 +500,8 @@ re_string_skip_chars (pstr, new_raw_idx,
wint_t *last_wc;
{
mbstate_t prev_st;
- int rawbuf_idx, mbclen;
+ int rawbuf_idx;
+ size_t mbclen;
wchar_t wc = 0;
/* Skip the characters which are not necessary to check. */
Jakub
next reply other threads:[~2005-03-17 21:48 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-03-17 21:48 Jakub Jelinek [this message]
2005-03-19 0:25 ` Ulrich Drepper
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050317214810.GG4961@sunsite.mff.cuni.cz \
--to=jakub@redhat.com \
--cc=drepper@redhat.com \
--cc=libc-hacker@sources.redhat.com \
--cc=roland@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).