public inbox for newlib-cvs@sourceware.org
help / color / mirror / Atom feed
* [newlib-cygwin] newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t
@ 2017-12-01 16:18 Corinna Vinschen
  0 siblings, 0 replies; only message in thread
From: Corinna Vinschen @ 2017-12-01 16:18 UTC (permalink / raw)
  To: newlib-cvs

https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=a49209d2bc84644cba75a68b1104d89a810aecb1

commit a49209d2bc84644cba75a68b1104d89a810aecb1
Author: Corinna Vinschen <corinna@vinschen.de>
Date:   Fri Dec 1 17:18:26 2017 +0100

    newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t
    
    * vfscanf: per POSIX, if the target type is wchar_t, the width is
      counted in (multibyte) characters, not in bytes.
    
    * vfscanf: Handle UTF-8 multibyte sequences converted to surrogate
      pairs on UTF-16 systems.
    
    * vfwscanf: Don't count high surrogates in input against field width
      counting.  Per POSIX, input is
    
    Signed-off-by: Corinna Vinschen <corinna@vinschen.de>

Diff:
---
 newlib/libc/stdio/vfscanf.c  | 28 +++++++++++++++++++------
 newlib/libc/stdio/vfwscanf.c | 50 +++++++++++++++++++++++++++-----------------
 2 files changed, 53 insertions(+), 25 deletions(-)

diff --git a/newlib/libc/stdio/vfscanf.c b/newlib/libc/stdio/vfscanf.c
index e8e4dab..f90079d 100644
--- a/newlib/libc/stdio/vfscanf.c
+++ b/newlib/libc/stdio/vfscanf.c
@@ -488,10 +488,15 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
       _p = _p0;								\
       _w;								\
     })
+  /* For systems with wchar_t == 2 (UTF-16) check if there's room for
+     at least 2 wchar_t's (surrogate pairs). */
   #define realloc_m_ptr(_type, _p, _p0, _p_p, _w)			\
     ({									\
       size_t _nw = (_w);						\
-      if (_p_p && _p - _p0 == _nw)					\
+      ptrdiff_t _dif = _p - _p0;					\
+      if (_p_p &&							\
+	  ((sizeof (_type) == 2 && _dif >= _nw - 1)			\
+	   || _dif >= _nw))						\
 	{								\
 	  _p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type));			\
 	  if (!_p0)							\
@@ -499,7 +504,7 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
 	      nassigned = EOF;						\
 	      goto match_failure;					\
 	    }								\
-	  _p = _p0 + _nw;						\
+	  _p = _p0 + _dif;						\
 	  *_p_p = _p0;							\
 	  _nw <<= 1;							\
 	}								\
@@ -948,7 +953,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
 	      size_t wcp_siz = 0;
 #endif
               mbstate_t state;
-              memset (&state, 0, sizeof (mbstate_t));
               if (flags & SUPPRESS)
                 wcp = NULL;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
@@ -958,13 +962,17 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
               else
                 wcp = GET_ARG (N, ap, wchar_t *);
               n = 0;
-              while (width-- != 0)
+              while (width != 0)
                 {
                   if (n == MB_CUR_MAX)
                     goto input_failure;
                   buf[n++] = *fp->_p;
                   fp->_r -= 1;
                   fp->_p += 1;
+		  /* Got a high surrogate, allow low surrogate to slip
+		     through */
+		  if (mbslen != 3 || state.__count != 4)
+		    memset (&state, 0, sizeof (mbstate_t));
                   if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
                                                          == (size_t)-1)
                     goto input_failure; /* Invalid sequence */
@@ -973,6 +981,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
                   if (mbslen != (size_t)-2) /* Incomplete sequence */
                     {
                       nread += n;
+		      /* Handle high surrogate */
+		      if (mbslen != 3 || state.__count != 4)
+			width -= 1;
                       if (!(flags & SUPPRESS))
 			{
 #ifdef _WANT_IO_POSIX_EXTENSIONS
@@ -1122,7 +1133,6 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
 #endif
               /* Process %S and %ls placeholders */
               mbstate_t state;
-              memset (&state, 0, sizeof (mbstate_t));
               if (flags & SUPPRESS)
                 wcp = &wc;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
@@ -1139,7 +1149,10 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
                   buf[n++] = *fp->_p;
                   fp->_r -= 1;
                   fp->_p += 1;
-		  width--;
+		  /* Got a high surrogate, allow low surrogate to slip
+		     through */
+		  if (mbslen != 3 || state.__count != 4)
+		    memset (&state, 0, sizeof (mbstate_t));
                   if ((mbslen = _mbrtowc_r (rptr, wcp, buf, n, &state))
                                                         == (size_t)-1)
                     goto input_failure;
@@ -1154,6 +1167,9 @@ _DEFUN(__SVFSCANF_R, (rptr, fp, fmt0, ap),
                           break;
                         }
                       nread += n;
+		      /* Handle high surrogate */
+		      if (mbslen != 3 || state.__count != 4)
+			width -= 1;
                       if ((flags & SUPPRESS) == 0)
 			{
 			  wcp += 1;
diff --git a/newlib/libc/stdio/vfwscanf.c b/newlib/libc/stdio/vfwscanf.c
index a317eae..9ef2bca 100644
--- a/newlib/libc/stdio/vfwscanf.c
+++ b/newlib/libc/stdio/vfwscanf.c
@@ -376,6 +376,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
   wint_t wi;                    /* handy wint_t */
   char *mbp = NULL;             /* multibyte string pointer for %c %s %[ */
   size_t nconv;                 /* number of bytes in mb. conversion */
+  char mbbuf[MB_LEN_MAX];	/* temporary mb. character buffer */
 
   char *cp;
   short *sp;
@@ -458,13 +459,15 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
       _p = _p0;								\
       _w;								\
     })
+  /* For char output, check if there's room for at least MB_CUR_MAX
+     characters. */
   #define realloc_m_ptr(_type, _p, _p0, _p_p, _w)			\
     ({									\
       size_t _nw = (_w);						\
       ptrdiff_t _dif = _p - _p0;					\
       if (_p_p &&							\
 	  ((sizeof (_type) == 1 && _dif >= _nw - MB_CUR_MAX)		\
-	   || (sizeof (_type) != 1 && _dif == _nw)))			\
+	   || _dif >= _nw))						\
 	{								\
 	  _p0 = (_type *) realloc (_p0, (_nw << 1) * sizeof (_type));	\
 	  if (!_p0)							\
@@ -925,7 +928,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
 #endif
 
 	      if (flags & SUPPRESS)
-		;
+		mbp = mbbuf;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
 	      else if (flags & MALLOC)
 		mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@@ -934,16 +937,19 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
 		mbp = GET_ARG(N, ap, char *);
 	      n = 0;
 	      memset ((_PTR)&mbs, '\0', sizeof (mbstate_t));
-	      while (width-- != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
+	      while (width != 0 && (wi = _fgetwc_r (rptr, fp)) != WEOF)
 		{
-#ifdef _WANT_IO_POSIX_EXTENSIONS
-		  mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
-#endif
+		  nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
+		  if (nconv == (size_t) -1)
+		    goto input_failure;
+		  /* Ignore high surrogate in width counting */
+		  if (nconv != 0 || mbs.__count != -4)
+		    width--;
 		  if (!(flags & SUPPRESS))
 		    {
-		      nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
-		      if (nconv == (size_t) -1)
-			goto input_failure;
+#ifdef _WANT_IO_POSIX_EXTENSIONS
+		      mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
+#endif
 		      mbp += nconv;
 		    }
 		  n++;
@@ -1014,7 +1020,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
 #endif
 
 	      if (flags & SUPPRESS)
-		;
+		mbp = mbbuf;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
 	      else if (flags & MALLOC)
 		mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@@ -1024,13 +1030,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
 	      n = 0;
 	      memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
 	      while ((wi = _fgetwc_r (rptr, fp)) != WEOF
-		     && width-- != 0 && INCCL (wi))
+		     && width != 0 && INCCL (wi))
 		{
+		  nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
+		  if (nconv == (size_t) -1)
+		    goto input_failure;
+		  /* Ignore high surrogate in width counting */
+		  if (nconv != 0 || mbs.__count != -4)
+		    width--;
 		  if (!(flags & SUPPRESS))
 		    {
-		      nconv = _wcrtomb_r (rptr, mbp, wi, &mbs);
-		      if (nconv == (size_t) -1)
-			goto input_failure;
 		      mbp += nconv;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
 		      mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);
@@ -1101,7 +1110,7 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
 #endif
 
 	      if (flags & SUPPRESS)
-		;
+		mbp = mbbuf;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
 	      else if (flags & MALLOC)
 		mbp_siz = alloc_m_ptr (char, mbp, mbp0, mbp_p, 32);
@@ -1110,13 +1119,16 @@ _DEFUN(__SVFWSCANF_R, (rptr, fp, fmt0, ap),
 		mbp = GET_ARG(N, ap, char *);
 	      memset ((_PTR) &mbs, '\0', sizeof (mbstate_t));
 	      while ((wi = _fgetwc_r (rptr, fp)) != WEOF
-		     && width-- != 0 && !iswspace (wi))
+		     && width != 0 && !iswspace (wi))
 		{
+		  nconv = wcrtomb(mbp, wi, &mbs);
+		  if (nconv == (size_t)-1)
+		    goto input_failure;
+		  /* Ignore high surrogate in width counting */
+		  if (nconv != 0 || mbs.__count != -4)
+		    width--;
 		  if (!(flags & SUPPRESS))
 		    {
-		      nconv = wcrtomb(mbp, wi, &mbs);
-		      if (nconv == (size_t)-1)
-			goto input_failure;
 		      mbp += nconv;
 #ifdef _WANT_IO_POSIX_EXTENSIONS
 		      mbp_siz = realloc_m_ptr (char, mbp, mbp0, mbp_p, mbp_siz);


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2017-12-01 16:18 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-01 16:18 [newlib-cygwin] newlib: vf[w]scanf: Fix conversion multibyte <-> wchar_t Corinna Vinschen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).