public inbox for libc-hacker@sourceware.org
 help / color / mirror / Atom feed
* [PATCH] Fix regex mb char handling
@ 2005-03-17 21:48 Jakub Jelinek
  2005-03-19  0:25 ` Ulrich Drepper
  0 siblings, 1 reply; 2+ messages in thread
From: Jakub Jelinek @ 2005-03-17 21:48 UTC (permalink / raw)
  To: Ulrich Drepper, Roland McGrath; +Cc: Glibc hackers

Hi!

The following patch fixes segfault on:
#include <locale.h>
#include <regex.h>
#include <stdio.h>

int main (void)
{
  setlocale (LC_ALL, "zh_CN");

  regex_t re;
  if (regcomp (&re, "abc", REG_ICASE))
    {
      puts ("regcomp failed");
      return 1;
    }
  if (regexec (&re, "\xa8\xa7", 0, NULL, 0) != REG_NOMATCH)
    {
      puts ("regexec unexpectedly succeeded");
      return 1;
    }
  regfree (&re);
  return 0;
}
testcase (not included yet in the patch, because I want to find out if
some already built testing locale has similar property, particularly
that for a mb char its towupper (resp. towlower) char is not
represent in the locale's character set).

2005-03-17  Jakub Jelinek  <jakub@redhat.com>

	* posix/regcomp.c (re_compile_fastmap_iter): Fix check for failed
	__wcrtomb.  Check return values of other __wcrtomb calls.
	* posix/regex_internal.c (build_wcs_buffer, re_string_skip_chars):
	Change mbclen type to size_t.
	(build_wcs_upper_buffer): Change mbclen and mbcdlen type to size_t.
	Handle mb chars whose upper case doesn't have multibyte representation
	in locale's charset.

--- libc/posix/regcomp.c.jj	2005-02-22 10:02:42.000000000 +0100
+++ libc/posix/regcomp.c	2005-03-17 21:06:43.622233856 +0100
@@ -359,7 +359,8 @@ re_compile_fastmap_iter (bufp, init_stat
 	      memset (&state, 0, sizeof (state));
 	      if (mbrtowc (&wc, (const char *) buf, p - buf,
 			   &state) == p - buf
-		  && __wcrtomb ((char *) buf, towlower (wc), &state) > 0)
+		  && (__wcrtomb ((char *) buf, towlower (wc), &state)
+		      != (size_t) -1))
 		re_set_fastmap (fastmap, 0, buf[0]);
 	    }
 #endif
@@ -409,12 +410,13 @@ re_compile_fastmap_iter (bufp, init_stat
 	      char buf[256];
 	      mbstate_t state;
 	      memset (&state, '\0', sizeof (state));
-	      __wcrtomb (buf, cset->mbchars[i], &state);
-	      re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
+	      if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
+		re_set_fastmap (fastmap, icase, *(unsigned char *) buf);
 	      if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
 		{
-		  __wcrtomb (buf, towlower (cset->mbchars[i]), &state);
-		  re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
+		  if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
+		      != (size_t) -1)
+		    re_set_fastmap (fastmap, 0, *(unsigned char *) buf);
 		}
 	    }
 	}
--- libc/posix/regex_internal.c.jj	2005-03-08 13:05:18.000000000 +0100
+++ libc/posix/regex_internal.c	2005-03-17 21:17:48.069329822 +0100
@@ -220,7 +220,8 @@ build_wcs_buffer (pstr)
   unsigned char buf[64];
 #endif
   mbstate_t prev_st;
-  int byte_idx, end_idx, mbclen, remain_len;
+  int byte_idx, end_idx, remain_len;
+  size_t mbclen;
 
   /* Build the buffers from pstr->valid_len to either pstr->len or
      pstr->bufs_len.  */
@@ -281,7 +282,8 @@ build_wcs_upper_buffer (pstr)
      re_string_t *pstr;
 {
   mbstate_t prev_st;
-  int src_idx, byte_idx, end_idx, mbclen, remain_len;
+  int src_idx, byte_idx, end_idx, remain_len;
+  size_t mbclen;
 #ifdef _LIBC
   char buf[MB_CUR_MAX];
   assert (MB_CUR_MAX >= pstr->mb_cur_max);
@@ -318,12 +320,12 @@ build_wcs_upper_buffer (pstr)
 	  mbclen = mbrtowc (&wc,
 			    ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
 			     + byte_idx), remain_len, &pstr->cur_state);
-	  if (BE (mbclen > 0, 1))
+	  if (BE (mbclen + 2 > 2, 1))
 	    {
 	      wchar_t wcu = wc;
 	      if (iswlower (wc))
 		{
-		  int mbcdlen;
+		  size_t mbcdlen;
 
 		  wcu = towupper (wc);
 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
@@ -386,20 +388,20 @@ build_wcs_upper_buffer (pstr)
 	else
 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
 	mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
-	if (BE (mbclen > 0, 1))
+	if (BE (mbclen + 2 > 2, 1))
 	  {
 	    wchar_t wcu = wc;
 	    if (iswlower (wc))
 	      {
-		int mbcdlen;
+		size_t mbcdlen;
 
 		wcu = towupper (wc);
 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
 		if (BE (mbclen == mbcdlen, 1))
 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
-		else
+		else if (mbcdlen != (size_t) -1)
 		  {
-		    int i;
+		    size_t i;
 
 		    if (byte_idx + mbcdlen > pstr->bufs_len)
 		      {
@@ -416,7 +418,7 @@ build_wcs_upper_buffer (pstr)
 		      }
 		    if (!pstr->offsets_needed)
 		      {
-			for (i = 0; i < byte_idx; ++i)
+			for (i = 0; i < (size_t) byte_idx; ++i)
 			  pstr->offsets[i] = i;
 			pstr->offsets_needed = 1;
 		      }
@@ -439,13 +441,15 @@ build_wcs_upper_buffer (pstr)
 		    src_idx += mbclen;
 		    continue;
 		  }
+                else
+                  memcpy (pstr->mbs + byte_idx, p, mbclen);
 	      }
 	    else
 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
 
 	    if (BE (pstr->offsets_needed != 0, 0))
 	      {
-		int i;
+		size_t i;
 		for (i = 0; i < mbclen; ++i)
 		  pstr->offsets[byte_idx + i] = src_idx + i;
 	      }
@@ -496,7 +500,8 @@ re_string_skip_chars (pstr, new_raw_idx,
      wint_t *last_wc;
 {
   mbstate_t prev_st;
-  int rawbuf_idx, mbclen;
+  int rawbuf_idx;
+  size_t mbclen;
   wchar_t wc = 0;
 
   /* Skip the characters which are not necessary to check.  */

	Jakub

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] Fix regex mb char handling
  2005-03-17 21:48 [PATCH] Fix regex mb char handling Jakub Jelinek
@ 2005-03-19  0:25 ` Ulrich Drepper
  0 siblings, 0 replies; 2+ messages in thread
From: Ulrich Drepper @ 2005-03-19  0:25 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Glibc hackers

[-- Attachment #1: Type: text/plain, Size: 109 bytes --]

Applied.

--
➧ Ulrich Drepper ➧ Red Hat, Inc. ➧ 444 Castro St ➧ Mountain View, CA ❖

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 251 bytes --]

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2005-03-19  0:25 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-03-17 21:48 [PATCH] Fix regex mb char handling Jakub Jelinek
2005-03-19  0:25 ` Ulrich Drepper

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).