From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id E6E633858428; Mon, 31 Jul 2023 20:52:18 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E6E633858428 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1690836738; bh=GBFMTIe8CRNG/BhbfU9RoEF8nnBHMMiP6o5KemMHaYQ=; h=From:To:Subject:Date:From; b=f150VG/l/rzX5eEw78+CDnLnr9WLnsA6CI33n2m1dK4SFHuRNbcBJkr6KbKfkDNpc iyzKqrkxEyxjbTfllOznQpSbJ0DfQ985koTJ1Bi2NPtac/ZxI1D33k9yVcI32AJ2Hp JH5WYV6Ftj0cGLgVNERTsqKPocEXlf/qTQ8pxGm0= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: fix GB18030 support X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: 8a4318943875cd922601d34e54ce8a83ad2e733c X-Git-Newrev: 68bb3deabd0381465e18d41ae7f722cfe95ba3c8 Message-Id: <20230731205218.E6E633858428@sourceware.org> Date: Mon, 31 Jul 2023 20:52:18 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D68bb3deabd0= 381465e18d41ae7f722cfe95ba3c8 commit 68bb3deabd0381465e18d41ae7f722cfe95ba3c8 Author: Corinna Vinschen AuthorDate: Fri Jul 28 23:33:52 2023 +0200 Commit: Corinna Vinschen CommitDate: Mon Jul 31 22:39:09 2023 +0200 Cygwin: fix GB18030 support =20 The changes to support GB18030 were insufficient and the underlying Windows conversion functions just failed. Fix how the Windows functions are called for GB18030. =20 Fixes: 5da71b605995 ("Cygwin: add support for GB18030 codeset") Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/strfuncs.cc | 150 ++++++++++++++++++++++++++++++++++++++++++= +++- 1 file changed, 148 insertions(+), 2 deletions(-) diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 76b7216cc3a2..43ce93ed22da 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -331,7 +331,55 @@ __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar= , mbstate_t *state) extern "C" int __gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *sta= te) { - return __db_wctomb (r,s, wchar, 54936); + int ret; + wchar_t wres[2]; + + if (s =3D=3D NULL) + return 0; + + if (state->__count =3D=3D 0) + { + if (wchar <=3D 0x7f) + { + *s =3D (char) wchar; + return 1; + } + + if (wchar >=3D 0xd800 && wchar <=3D 0xdbff) + { + /* First half of a surrogate pair */ + state->__count =3D 18030; + state->__value.__wch =3D wchar; + return 0; + } + ret =3D WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, &wchar, 1,= s, + 4, NULL, NULL); + if (ret > 0) + return ret; + goto ilseq; + } + else if (state->__count =3D=3D 18030 && state->__value.__wch >=3D 0xd800 + && state->__value.__wch <=3D 0xdbff) + { + if (wchar >=3D 0xdc00 && wchar <=3D 0xdfff) + { + /* Create multibyte sequence from full surrogate pair. */ + wres[0] =3D state->__value.__wch; + wres[1] =3D wchar; + ret =3D WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, wres, 2, s, 4, + NULL, NULL); + if (ret > 0) + { + state->__count =3D 0; + return ret; + } + } +ilseq: + _REENT_ERRNO(r) =3D EILSEQ; + return -1; + } + _REENT_ERRNO(r) =3D EINVAL; + return -1; } =20 extern "C" int @@ -495,7 +543,105 @@ extern "C" int __gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, mbstate_t *state) { - return __db_mbtowc (r, pwc, s, n, 54936, state); + wchar_t wres[2], dummy; + unsigned char ch; + int ret, len, ocount; + size_t ncopy; + + if (state->__count < 0 || (state->__count > (int) sizeof state->__value.= __wchb + && state->__count !=3D 18030)) + { + errno =3D EINVAL; + return -1; + } + + if (s =3D=3D NULL) + { + s =3D ""; + n =3D 1; + pwc =3D NULL; + } + + if (state->__count =3D=3D 18030) + { + /* Return second half of the surrogate pair */ + *pwc =3D state->__value.__wch; + state->__count =3D 0; + return 1; + } + + ncopy =3D MIN (MIN (n, MB_CUR_MAX), + sizeof state->__value.__wchb - state->__count); + memcpy (state->__value.__wchb + state->__count, s, ncopy); + ocount =3D state->__count; + state->__count +=3D ncopy; + s =3D (char *) state->__value.__wchb; + n =3D state->__count; + + if (n =3D=3D 0) /* Incomplete multibyte sequence */ + return -2; + + if (!pwc) + pwc =3D &dummy; + + /* Check if input is a valid GB18030 char (per FreeBSD): + * Single byte: [00-7f] + * Two byte: [81-fe][40-7e,80-fe] + * Four byte: [81-fe][30-39][81-fe][30-39] + */ + ch =3D *(unsigned char *) s; + if (ch <=3D 0x7f) + { + *pwc =3D ch; + state->__count =3D 0; + return ch ? 1 : 0; + } + if (ch >=3D 0x81 && ch <=3D 0xfe) + { + if (n < 2) + return -2; + ch =3D (unsigned char) s[1]; + if ((ch >=3D 0x40 && ch <=3D 0x7e) || (ch >=3D 0x80 && ch <=3D 0xfe)) + len =3D 2; + else if (ch >=3D 0x30 && ch <=3D 0x39) + { + if (n < 3) + return -2; + ch =3D (unsigned char) s[2]; + if (ch < 0x81 || ch > 0xfe) + goto ilseq; + if (n < 4) + return -2; + ch =3D (unsigned char) s[3]; + if (ch < 0x30 || ch > 0x39) + goto ilseq; + len =3D 4; + } + else + goto ilseq; + } + else + goto ilseq; + ret =3D MultiByteToWideChar (54936, MB_ERR_INVALID_CHARS, s, len, wres, = 2); + if (ret) + { + *pwc =3D wres[0]; + if (ret =3D=3D 2) + { + /* Surrogate pair. Store second half for later and return + first half. Return real count - 1, return 1 when the second + half of the pair is returned in the next run. */ + state->__count =3D 18030; + state->__value.__wch =3D wres[1]; + --len; + } + else + state->__count =3D 0; + return len - ocount; + } +ilseq: + _REENT_ERRNO(r) =3D EILSEQ; + return -1; } =20 extern "C" int