From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id D766C3858417; Tue, 14 Feb 2023 12:10:13 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org D766C3858417 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1676376613; bh=VEiQ3jz8Xolo7LtcAAcEVTGYtHfuBLYVY1cN8Zz6QkE=; h=From:To:Subject:Date:From; b=r1WC18yOQjE4WAGIqR2Qxk+bu41tY/GBM8HnU8w/tp+Cgf0BcmqVQ+MN1Q8aSFZU3 FH8A8ni81PhIm1JQeWx2hgmuxFiXA9q8NPbG7vLfjypmfNGupYZNOANYvI2bGeVd8y nBD0TXdMq27WlYPhT2g6Pt7Wn4ynrsJ2bR8K5dvw= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: regex: convert wchar_t to wint_t X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: 0321fb5736545b5535e640b99b550ed6249f0e0c X-Git-Newrev: 588624da2b0a921c7e72d12d19c2ac3321de0326 Message-Id: <20230214121013.D766C3858417@sourceware.org> Date: Tue, 14 Feb 2023 12:10:13 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3D588624da2b0= a921c7e72d12d19c2ac3321de0326 commit 588624da2b0a921c7e72d12d19c2ac3321de0326 Author: Corinna Vinschen AuthorDate: Tue Feb 14 13:02:15 2023 +0100 Commit: Corinna Vinschen CommitDate: Tue Feb 14 13:02:15 2023 +0100 Cygwin: regex: convert wchar_t to wint_t =20 - call mbrtowi instead of mbrtowc - drop Cygwin-only surrogate handling from wgetnext and xmbrtowc since it's encapsulated in mbrtowi. =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/regex/regcomp.c | 25 +++++-------------------- winsup/cygwin/regex/regexec.c | 22 +++------------------- 2 files changed, 8 insertions(+), 39 deletions(-) diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c index a377e56fac22..aef104c1ea45 100644 --- a/winsup/cygwin/regex/regcomp.c +++ b/winsup/cygwin/regex/regcomp.c @@ -921,7 +921,7 @@ p_b_coll_elem(struct parse *p, struct cname *cp; int len; mbstate_t mbs; - wchar_t wc; + wint_t wc; size_t clen; =20 while (MORE() && !SEETWO(endc, ']')) @@ -935,7 +935,7 @@ p_b_coll_elem(struct parse *p, if (strncmp(cp->name, sp, len) =3D=3D 0 && cp->name[len] =3D=3D '\0') return(cp->code); /* known name */ memset(&mbs, 0, sizeof(mbs)); - if ((clen =3D mbrtowc(&wc, sp, len, &mbs)) =3D=3D len) + if ((clen =3D mbrtowi(&wc, sp, len, &mbs)) =3D=3D len) return (wc); /* single character */ else if (clen =3D=3D (size_t)-1 || clen =3D=3D (size_t)-2) SETERROR(REG_ILLSEQ); @@ -1119,8 +1119,7 @@ static wint_t wgetnext(struct parse *p) { mbstate_t mbs; - wchar_t wc; - wint_t ret; + wint_t wc; size_t n; =20 #ifdef __CYGWIN__ @@ -1136,29 +1135,15 @@ wgetnext(struct parse *p) return (wint_t) (unsigned char) *p->next++; #endif memset(&mbs, 0, sizeof(mbs)); - n =3D mbrtowc(&wc, p->next, p->end - p->next, &mbs); + n =3D mbrtowi(&wc, p->next, p->end - p->next, &mbs); if (n =3D=3D (size_t)-1 || n =3D=3D (size_t)-2) { SETERROR(REG_ILLSEQ); return (0); } - ret =3D wc; if (n =3D=3D 0) n =3D 1; - else if (sizeof (wchar_t) =3D=3D 2 && wc >=3D 0xd800 && wc <=3D 0xdbff) { - /* UTF-16 surrogate pair. Fetch second half and - compute UTF-32 value */ - size_t n2 =3D mbrtowc(&wc, p->next + n, - p->end - p->next - n, &mbs); - if (n2 =3D=3D 0 || n2 =3D=3D (size_t)-1 || n2 =3D=3D (size_t)-2) { - SETERROR(REG_ILLSEQ); - return (0); - } - ret =3D (((ret & 0x3ff) << 10) | (wc & 0x3ff)) - + 0x10000; - n +=3D n2; - } p->next +=3D n; - return (ret); + return (wc); } =20 static size_t diff --git a/winsup/cygwin/regex/regexec.c b/winsup/cygwin/regex/regexec.c index c400578b898d..94e95e65abcd 100644 --- a/winsup/cygwin/regex/regexec.c +++ b/winsup/cygwin/regex/regexec.c @@ -68,9 +68,9 @@ static __inline size_t xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy) { size_t nr; - wchar_t wc; + wint_t wc; =20 - nr =3D mbrtowc(&wc, s, n, mbs); + nr =3D mbrtowi(&wc, s, n, mbs); if (wi !=3D NULL) *wi =3D wc; if (nr =3D=3D 0) @@ -80,24 +80,8 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t = *mbs, wint_t dummy) if (wi !=3D NULL) *wi =3D dummy; return (1); - } else { - if (sizeof (wchar_t) =3D=3D 2 && wc >=3D 0xd800 && wc <=3D 0xdbff) { - /* UTF-16 surrogate pair. Fetch second half and - compute UTF-32 value */ - size_t n2 =3D mbrtowc(&wc, s + nr, n - nr, mbs); - if (n2 =3D=3D 0 || n2 =3D=3D (size_t)-1 || n2 =3D=3D (size_t)-2) { - memset(mbs, 0, sizeof(*mbs)); - if (wi !=3D NULL) - *wi =3D dummy; - return (1); - } - if (wi !=3D NULL) - *wi =3D (((*wi & 0x3ff) << 10) | (wc & 0x3ff)) - + 0x10000; - nr +=3D n2; - } + } else return (nr); - } } =20 static __inline size_t