From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id DD7EA38582BE; Wed, 2 Aug 2023 15:04:14 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DD7EA38582BE DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1690988654; bh=iFtXHCg3Id3g9urRrJEIidKA1cAxmD5cb9I50abTewY=; h=From:To:Subject:Date:From; b=h0Wdtc3P+pjZfnzg6Yv1MtoER6GkiiE2orTNf5yTjFuSwT+wazqKMpnF0fKfnygra QR2l0ONAz47lcjTTHI3ovtg38ci8W/7RrQJY0YZZymAjho2AYpQaTzXJ3yQv5DgsYW ysEPbl5d5CM71hnJMO7v1dMnyHAY2kBh1WKABCew= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8 X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: 290b56a87947620f171f65190616116fc1e0082c X-Git-Newrev: c49bc478b4a7e7d00a0c8540b8d6b6e99453443e Message-Id: <20230802150414.DD7EA38582BE@sourceware.org> Date: Wed, 2 Aug 2023 15:04:14 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3Dc49bc478b4a= 7e7d00a0c8540b8d6b6e99453443e commit c49bc478b4a7e7d00a0c8540b8d6b6e99453443e Author: Corinna Vinschen AuthorDate: Wed Aug 2 16:55:52 2023 +0200 Commit: Corinna Vinschen CommitDate: Wed Aug 2 16:56:24 2023 +0200 Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8 =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/include/cygwin/version.h | 3 +- winsup/cygwin/include/uchar.h | 14 ++- winsup/cygwin/release/3.5.0 | 2 +- winsup/cygwin/strfuncs.cc | 166 +++++++++++++++++++++++++++++= ++++ winsup/doc/new-features.xml | 6 +- 5 files changed, 184 insertions(+), 7 deletions(-) diff --git a/winsup/cygwin/include/cygwin/version.h b/winsup/cygwin/include= /cygwin/version.h index 7bc3e5ec3b25..833de646c563 100644 --- a/winsup/cygwin/include/cygwin/version.h +++ b/winsup/cygwin/include/cygwin/version.h @@ -482,12 +482,13 @@ details. */ 346: (Belatedly) add posix_spawn_file_actions_addchdir_np, posix_spawn_file_actions_addfchdir_np. 347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. + 348: Add c8rtomb, mbrtoc. =20 Note that we forgot to bump the api for ualarm, strtoll, strtoull, sigaltstack, sethostname. */ =20 #define CYGWIN_VERSION_API_MAJOR 0 -#define CYGWIN_VERSION_API_MINOR 346 +#define CYGWIN_VERSION_API_MINOR 348 =20 /* There is also a compatibity version number associated with the shared m= emory regions. It is incremented when incompatible changes are made to the s= hared diff --git a/winsup/cygwin/include/uchar.h b/winsup/cygwin/include/uchar.h index bf865ff16e7f..ed548ac7360e 100644 --- a/winsup/cygwin/include/uchar.h +++ b/winsup/cygwin/include/uchar.h @@ -4,8 +4,11 @@ #include #include =20 -typedef __uint16_t char16_t; -typedef __uint32_t char32_t; +/* Either C2x or if C++ doesn't already define char8_t */ +#if __ISO_C_VISIBLE >=3D 2020 && !defined (__cpp_char8_t) +typedef unsigned char char8_t; +#endif + /* C++11 already defines those types. */ #if !defined (__cplusplus) || (__cplusplus - 0 < 201103L) typedef __uint_least16_t char16_t; @@ -14,6 +17,13 @@ typedef __uint_least32_t char32_t; =20 __BEGIN_DECLS =20 +/* Either C2x or if C++ defines char8_t */ +#if __ISO_C_VISIBLE >=3D 2020 || defined (__cpp_char8_t) +size_t c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict); +size_t mbrtoc8(char8_t * __restrict, const char * __restrict, size_t, + mbstate_t * __restrict); +#endif + size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict); size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0 index 7c27e1bb8ca7..d71de50de536 100644 --- a/winsup/cygwin/release/3.5.0 +++ b/winsup/cygwin/release/3.5.0 @@ -27,7 +27,7 @@ What's new: - New API calls: posix_spawn_file_actions_addchdir_np, posix_spawn_file_actions_addfchdir_np. =20 -- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. +- New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32. =20 What changed: ------------- diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc index 770d40ed8f2f..cbcd0ec464ee 100644 --- a/winsup/cygwin/strfuncs.cc +++ b/winsup/cygwin/strfuncs.cc @@ -155,6 +155,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps) return wcrtomb (s, (wchar_t) wc, ps); } =20 +extern "C" size_t +c8rtomb (char *s, char8_t c8, mbstate_t *ps) +{ + struct _reent *reent =3D _REENT; + char32_t wc; + + if (ps =3D=3D NULL) + { + _REENT_CHECK_MISC(reent); + ps =3D &(_REENT_MBRTOWC_STATE(reent)); + } + + if (s =3D=3D NULL) + { + ps->__count =3D 0; + return 1; + } + if ((ps->__count & 0xff00) !=3D 0xc800) + { + switch (c8) + { + case 0 ... 0x7f: /* single octet */ + ps->__count =3D 0; + wc =3D c8; + break; + case 0xc2 ... 0xf4: /* valid lead byte */ + ps->__count =3D 0xc801; + ps->__value.__wchb[0] =3D c8; + return 0; + default: + goto ilseq; + } + } + else + { + /* We already collected something... */ + int idx =3D ps->__count & 0x3; + char8_t &c1 =3D ps->__value.__wchb[0]; + char8_t &c2 =3D ps->__value.__wchb[1]; + char8_t &c3 =3D ps->__value.__wchb[2]; + + switch (idx) + { + case 1: + /* Annoyingly complex check for validity for 2nd octet. */ + if (c8 <=3D 0x7f || c8 >=3D 0xc0) + goto ilseq; + if (c1 =3D=3D 0xe0 && c8 <=3D 0x9f) + goto ilseq; + if (c1 =3D=3D 0xed && c8 >=3D 0xa0) + goto ilseq; + if (c1 =3D=3D 0xf0 && c8 <=3D 0x8f) + goto ilseq; + if (c1 =3D=3D 0xf4 && c8 >=3D 0x90) + goto ilseq; + if (c1 >=3D 0xe0) + { + ps->__count =3D 0xc802; + c2 =3D c8; + return 0; + } + wc =3D ((c1 & 0x1f) << 6) + | (c8 & 0x3f); + break; + case 2: + if (c8 <=3D 0x7f || c8 >=3D 0xc0) + goto ilseq; + if (c1 >=3D 0xf0) + { + ps->__count =3D 0xc803; + c3 =3D c8; + return 0; + } + wc =3D ((c1 & 0x0f) << 12) + | ((c2 & 0x3f) << 6) + | (c8 & 0x3f); + break; + case 3: + if (c8 <=3D 0x7f || c8 >=3D 0xc0) + goto ilseq; + wc =3D ((c1 & 0x07) << 18) + | ((c2 & 0x3f) << 12) + | ((c3 & 0x3f) << 6) + | (c8 & 0x3f); + break; + default: /* Shouldn't happen */ + goto ilseq; + } + } + ps->__count =3D 0; + return c32rtomb (s, wc, ps); +ilseq: + ps->__count =3D 0; + _REENT_ERRNO(reent) =3D EILSEQ; + return (size_t)(-1); +} + extern "C" size_t mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps) { @@ -245,6 +342,75 @@ ilseq: return (size_t)(-1); } =20 +extern "C" size_t +mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps) +{ + struct _reent *reent =3D _REENT; + size_t len; + char32_t wc; + + if (ps =3D=3D NULL) + { + _REENT_CHECK_MISC(reent); + ps =3D &(_REENT_MBRTOWC_STATE(reent)); + } + + if (s =3D=3D NULL) + { + if (ps) + ps->__count =3D 0; + return 1; + } + else if ((ps->__count & 0xff00) =3D=3D 0xc800) + { + /* Return next utf-8 octet in line. */ + int idx =3D ps->__count & 0x3; + + if (pc8) + *pc8 =3D ps->__value.__wchb[--idx]; + if (idx =3D=3D 0) + ps->__count =3D 0; + return -3; + } + len =3D mbrtoc32 (&wc, s, n, ps); + if (len > 0) + { + /* octets stored back to front for easier indexing */ + switch (wc) + { + case 0 ... 0x7f: + ps->__value.__wchb[0] =3D wc; + ps->__count =3D 0; + break; + case 0x80 ... 0x7ff: + ps->__value.__wchb[1] =3D 0xc0 | ((wc & 0x7c0) >> 6); + ps->__value.__wchb[0] =3D 0x80 | (wc & 0x3f); + ps->__count =3D 0xc800 | 1; + break; + case 0x800 ... 0xffff: + ps->__value.__wchb[2] =3D 0xe0 | ((wc & 0xf000) >> 12); + ps->__value.__wchb[1] =3D 0x80 | ((wc & 0xfc0) >> 6); + ps->__value.__wchb[0] =3D 0x80 | (wc & 0x3f); + ps->__count =3D 0xc800 | 2; + break; + case 0x10000 ... 0x10ffff: + ps->__value.__wchb[3] =3D 0xf0 | ((wc & 0x1c0000) >> 18); + ps->__value.__wchb[2] =3D 0x80 | ((wc & 0x3f000) >> 12); + ps->__value.__wchb[1] =3D 0x80 | ((wc & 0xfc0) >> 6); + ps->__value.__wchb[0] =3D 0x80 | (wc & 0x3f); + ps->__count =3D 0xc800 | 3; + break; + default: + ps->__count =3D 0; + _REENT_ERRNO(reent) =3D EILSEQ; + return (size_t)(-1); + } + if (pc8) + *pc8 =3D ps->__value.__wchb[ps->__count & 0x3]; + } + return len; +} + extern "C" size_t mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_= t *ps) { diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml index b6d3e3a30a36..14644aa85ebe 100644 --- a/winsup/doc/new-features.xml +++ b/winsup/doc/new-features.xml @@ -46,12 +46,12 @@ Add support for GB18030 codeset. =20 -- New API calls: posix_spawn_file_actions_addchdir_np, - posix_spawn_file_actions_addfchdir_np. +New API calls: posix_spawn_file_actions_addchdir_np, +posix_spawn_file_actions_addfchdir_np. =20 -- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32. +New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32. =20