public inbox for cygwin-cvs@sourceware.org help / color / mirror / Atom feed
From: Corinna Vinschen <corinna@sourceware.org> To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: fix GB18030 support Date: Mon, 31 Jul 2023 20:52:18 +0000 (GMT) [thread overview] Message-ID: <20230731205218.E6E633858428@sourceware.org> (raw) https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=68bb3deabd0381465e18d41ae7f722cfe95ba3c8 commit 68bb3deabd0381465e18d41ae7f722cfe95ba3c8 Author: Corinna Vinschen <corinna@vinschen.de> AuthorDate: Fri Jul 28 23:33:52 2023 +0200 Commit: Corinna Vinschen <corinna@vinschen.de> CommitDate: Mon Jul 31 22:39:09 2023 +0200 Cygwin: fix GB18030 support The changes to support GB18030 were insufficient and the underlying Windows conversion functions just failed. Fix how the Windows functions are called for GB18030. Fixes: 5da71b605995 ("Cygwin: add support for GB18030 codeset") Signed-off-by: Corinna Vinschen <corinna@vinschen.de> Diff: --- winsup/cygwin/strfuncs.cc | 150 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 148 insertions(+), 2 deletions(-) diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 76b7216cc3a2..43ce93ed22da 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -331,7 +331,55 @@ __gbk_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) extern "C" int __gb18030_wctomb (struct _reent *r, char *s, wchar_t wchar, mbstate_t *state) { - return __db_wctomb (r,s, wchar, 54936); + int ret; + wchar_t wres[2]; + + if (s == NULL) + return 0; + + if (state->__count == 0) + { + if (wchar <= 0x7f) + { + *s = (char) wchar; + return 1; + } + + if (wchar >= 0xd800 && wchar <= 0xdbff) + { + /* First half of a surrogate pair */ + state->__count = 18030; + state->__value.__wch = wchar; + return 0; + } + ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, &wchar, 1, s, + 4, NULL, NULL); + if (ret > 0) + return ret; + goto ilseq; + } + else if (state->__count == 18030 && state->__value.__wch >= 0xd800 + && state->__value.__wch <= 0xdbff) + { + if (wchar >= 0xdc00 && wchar <= 0xdfff) + { + /* Create multibyte sequence from full surrogate pair. */ + wres[0] = state->__value.__wch; + wres[1] = wchar; + ret = WideCharToMultiByte (54936, WC_ERR_INVALID_CHARS, wres, 2, s, 4, + NULL, NULL); + if (ret > 0) + { + state->__count = 0; + return ret; + } + } +ilseq: + _REENT_ERRNO(r) = EILSEQ; + return -1; + } + _REENT_ERRNO(r) = EINVAL; + return -1; } extern "C" int @@ -495,7 +543,105 @@ extern "C" int __gb18030_mbtowc (struct _reent *r, wchar_t *pwc, const char *s, size_t n, mbstate_t *state) { - return __db_mbtowc (r, pwc, s, n, 54936, state); + wchar_t wres[2], dummy; + unsigned char ch; + int ret, len, ocount; + size_t ncopy; + + if (state->__count < 0 || (state->__count > (int) sizeof state->__value.__wchb + && state->__count != 18030)) + { + errno = EINVAL; + return -1; + } + + if (s == NULL) + { + s = ""; + n = 1; + pwc = NULL; + } + + if (state->__count == 18030) + { + /* Return second half of the surrogate pair */ + *pwc = state->__value.__wch; + state->__count = 0; + return 1; + } + + ncopy = MIN (MIN (n, MB_CUR_MAX), + sizeof state->__value.__wchb - state->__count); + memcpy (state->__value.__wchb + state->__count, s, ncopy); + ocount = state->__count; + state->__count += ncopy; + s = (char *) state->__value.__wchb; + n = state->__count; + + if (n == 0) /* Incomplete multibyte sequence */ + return -2; + + if (!pwc) + pwc = &dummy; + + /* Check if input is a valid GB18030 char (per FreeBSD): + * Single byte: [00-7f] + * Two byte: [81-fe][40-7e,80-fe] + * Four byte: [81-fe][30-39][81-fe][30-39] + */ + ch = *(unsigned char *) s; + if (ch <= 0x7f) + { + *pwc = ch; + state->__count = 0; + return ch ? 1 : 0; + } + if (ch >= 0x81 && ch <= 0xfe) + { + if (n < 2) + return -2; + ch = (unsigned char) s[1]; + if ((ch >= 0x40 && ch <= 0x7e) || (ch >= 0x80 && ch <= 0xfe)) + len = 2; + else if (ch >= 0x30 && ch <= 0x39) + { + if (n < 3) + return -2; + ch = (unsigned char) s[2]; + if (ch < 0x81 || ch > 0xfe) + goto ilseq; + if (n < 4) + return -2; + ch = (unsigned char) s[3]; + if (ch < 0x30 || ch > 0x39) + goto ilseq; + len = 4; + } + else + goto ilseq; + } + else + goto ilseq; + ret = MultiByteToWideChar (54936, MB_ERR_INVALID_CHARS, s, len, wres, 2); + if (ret) + { + *pwc = wres[0]; + if (ret == 2) + { + /* Surrogate pair. Store second half for later and return + first half. Return real count - 1, return 1 when the second + half of the pair is returned in the next run. */ + state->__count = 18030; + state->__value.__wch = wres[1]; + --len; + } + else + state->__count = 0; + return len - ocount; + } +ilseq: + _REENT_ERRNO(r) = EILSEQ; + return -1; } extern "C" int
reply other threads:[~2023-07-31 20:52 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230731205218.E6E633858428@sourceware.org \ --to=corinna@sourceware.org \ --cc=cygwin-cvs@sourceware.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).