From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 100007 invoked by alias); 1 Dec 2017 16:18:41 -0000 Mailing-List: contact newlib-cvs-help@sourceware.org; run by ezmlm Precedence: bulk List-Id: List-Subscribe: List-Archive: List-Post: List-Help: , Sender: newlib-cvs-owner@sourceware.org Received: (qmail 99960 invoked by uid 9078); 1 Dec 2017 16:18:41 -0000 Date: Fri, 01 Dec 2017 16:18:00 -0000 Message-ID: <20171201161841.99957.qmail@sourceware.org> Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Corinna Vinschen To: newlib-cvs@sourceware.org Subject: [newlib-cygwin] newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/master X-Git-Oldrev: 9638c0752798c1c9aaf2e56ebaab240eae5eb8dc X-Git-Newrev: a49209d2bc84644cba75a68b1104d89a810aecb1 X-SW-Source: 2017-q4/txt/msg00073.txt.bz2 https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=a49209d2bc84644cba75a68b1104d89a810aecb1 commit a49209d2bc84644cba75a68b1104d89a810aecb1 Author: Corinna Vinschen Date: Fri Dec 1 17:18:26 2017 +0100 newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t * vfscanf: per POSIX, if the target type is wchar_t, the width is counted in (multibyte) characters, not in bytes. * vfscanf: Handle UTF-8 multibyte sequences converted to surrogate pairs on UTF-16 systems. * vfwscanf: Don't count high surrogates in input against field width counting. Per POSIX, input is Signed-off-by: Corinna Vinschen Diff: --- newlib/libc/stdio/vfscanf.c | 28 +++++++++++++++++++------ newlib/libc/stdio/vfwscanf.c | 50 +++++++++++++++++++++++++++----------------- 2 files changed, 53 insertions(+), 25 deletions(-) diff --git a/newlib/libc/stdio/vfscanf.c b/newlib/libc/stdio/vfscanf.c index e8e4dab..f90079d 100644 --- a/newlib/libc/stdio/vfscanf.c +++ b/newlib/libc/stdio/vfscanf.c @@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), _p = _p0; \ _w; \ }) + /* For systems with wchar_t == 2 (UTF-16) check if there's room for + at least 2 wchar_t's (surrogate pairs). */ #define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \ ({ \ size_t _nw = (_w); \ - if (_p_p && _p - _p0 == _nw) \ + ptrdiff_t _dif = _p - _p0; \ + if (_p_p && \ + ((sizeof (_type) == 2 && _dif >= _nw - 1) \ + || _dif >= _nw)) \ { \ _p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \ if (!_p0) \ @@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), nassigned = EOF; \ goto match_failure; \ } \ - _p = _p0 + _nw; \ + _p = _p0 + _dif; \ *_p_p = _p0; \ _nw <<= 1; \ } \ @@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), size_t wcp_siz = 0; #endif mbstate_t state; - memset (&state, 0, sizeof (mbstate_t)); if (flags & SUPPRESS) wcp = NULL; #ifdef _WANT_IO_POSIX_EXTENSIONS @@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), else wcp = GET_ARG (N, ap, wchar_t *); n = 0; - while (width-- != 0) + while (width != 0) { if (n == MB_CUR_MAX) goto input_failure; buf[n++] = *fp->_p; fp->_r -= 1; fp->_p += 1; + /* Got a high surrogate, allow low surrogate to slip + through */ + if (mbslen != 3 || state.__count != 4) + memset (&state, 0, sizeof (mbstate_t)); if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state)) == (size_t)-1) goto input_failure; /* Invalid sequence */ @@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), if (mbslen != (size_t)-2) /* Incomplete sequence */ { nread += n; + /* Handle high surrogate */ + if (mbslen != 3 || state.__count != 4) + width -= 1; if (!(flags & SUPPRESS)) { #ifdef _WANT_IO_POSIX_EXTENSIONS @@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), #endif /* Process %S and %ls placeholders */ mbstate_t state; - memset (&state, 0, sizeof (mbstate_t)); if (flags & SUPPRESS) wcp = &wc; #ifdef _WANT_IO_POSIX_EXTENSIONS @@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), buf[n++] = *fp->_p; fp->_r -= 1; fp->_p += 1; - width--; + /* Got a high surrogate, allow low surrogate to slip + through */ + if (mbslen != 3 || state.__count != 4) + memset (&state, 0, sizeof (mbstate_t)); if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state)) == (size_t)-1) goto input_failure; @@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap), break; } nread += n; + /* Handle high surrogate */ + if (mbslen != 3 || state.__count != 4) + width -= 1; if ((flags & SUPPRESS) == 0) { wcp += 1; diff --git a/newlib/libc/stdio/vfwscanf.c b/newlib/libc/stdio/vfwscanf.c index a317eae..9ef2bca 100644 --- a/newlib/libc/stdio/vfwscanf.c +++ b/newlib/libc/stdio/vfwscanf.c @@ -376,6 +376,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), wint_t wi; /* handy wint_t */ char *mbp = NULL; /* multibyte string pointer for %c %s %[ */ size_t nconv; /* number of bytes in mb. conversion */ + char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */ char *cp; short *sp; @@ -458,13 +459,15 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), _p = _p0; \ _w; \ }) + /* For char output, check if there's room for at least MB_CUR_MAX + characters. */ #define realloc_m_ptr(_type, _p, _p0, _p_p, _w) \ ({ \ size_t _nw = (_w); \ ptrdiff_t _dif = _p - _p0; \ if (_p_p && \ ((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX) \ - || (sizeof (_type) != 1 && _dif == _nw))) \ + || _dif >= _nw)) \ { \ _p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type)); \ if (!_p0) \ @@ -925,7 +928,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), #endif if (flags & SUPPRESS) - ; + mbp = mbbuf; #ifdef _WANT_IO_POSIX_EXTENSIONS else if (flags & MALLOC) mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32); @@ -934,16 +937,19 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), mbp = GET_ARG(N, ap, char *); n = 0; memset ((_PTR)&mbs, '\0', sizeof (mbstate_t)); - while (width-- != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF) + while (width != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF) { -#ifdef _WANT_IO_POSIX_EXTENSIONS - mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz); -#endif + nconv = _wcrtomb_r (rptr, mbp, wi, &mbs); + if (nconv == (size_t) -1) + goto input_failure; + /* Ignore high surrogate in width counting */ + if (nconv != 0 || mbs.__count != -4) + width--; if (!(flags & SUPPRESS)) { - nconv = _wcrtomb_r (rptr, mbp, wi, &mbs); - if (nconv == (size_t) -1) - goto input_failure; +#ifdef _WANT_IO_POSIX_EXTENSIONS + mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz); +#endif mbp += nconv; } n++; @@ -1014,7 +1020,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), #endif if (flags & SUPPRESS) - ; + mbp = mbbuf; #ifdef _WANT_IO_POSIX_EXTENSIONS else if (flags & MALLOC) mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32); @@ -1024,13 +1030,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), n = 0; memset ((_PTR) &mbs, '\0', sizeof (mbstate_t)); while ((wi = _fgetwc_r (rptr, fp)) != WEOF - && width-- != 0 && INCCL (wi)) + && width != 0 && INCCL (wi)) { + nconv = _wcrtomb_r (rptr, mbp, wi, &mbs); + if (nconv == (size_t) -1) + goto input_failure; + /* Ignore high surrogate in width counting */ + if (nconv != 0 || mbs.__count != -4) + width--; if (!(flags & SUPPRESS)) { - nconv = _wcrtomb_r (rptr, mbp, wi, &mbs); - if (nconv == (size_t) -1) - goto input_failure; mbp += nconv; #ifdef _WANT_IO_POSIX_EXTENSIONS mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz); @@ -1101,7 +1110,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), #endif if (flags & SUPPRESS) - ; + mbp = mbbuf; #ifdef _WANT_IO_POSIX_EXTENSIONS else if (flags & MALLOC) mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32); @@ -1110,13 +1119,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap), mbp = GET_ARG(N, ap, char *); memset ((_PTR) &mbs, '\0', sizeof (mbstate_t)); while ((wi = _fgetwc_r (rptr, fp)) != WEOF - && width-- != 0 && !iswspace (wi)) + && width != 0 && !iswspace (wi)) { + nconv = wcrtomb(mbp, wi, &mbs); + if (nconv == (size_t)-1) + goto input_failure; + /* Ignore high surrogate in width counting */ + if (nconv != 0 || mbs.__count != -4) + width--; if (!(flags & SUPPRESS)) { - nconv = wcrtomb(mbp, wi, &mbs); - if (nconv == (size_t)-1) - goto input_failure; mbp += nconv; #ifdef _WANT_IO_POSIX_EXTENSIONS mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);