From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2155) id A6599385841A; Wed, 1 Mar 2023 09:55:41 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org A6599385841A DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=sourceware.org; s=default; t=1677664541; bh=oImp/gWBa0iEb0tQR+P4oGynRA28hw11zvb73+OvsV4=; h=From:To:Subject:Date:From; b=MJUnuAHktihNTZYQ6CsTRseHVlhGwbcx6wYUhDiY13mhA5b9dC6cXaQ7q3hUIKl3o zfVqaN5ANmgSDV4cBfeyXK0VaBtJZYQNIEtSeKwnhNl7+z1QFOx/JQxaX9uXpOlE5F ZJd0j2upAQNi9fdnxFQcXpjyXApkhMR1XxXIgwiQ= Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Corinna Vinschen To: cygwin-cvs@sourceware.org Subject: [newlib-cygwin/main] Cygwin: fnmatch: support collating symbols in [. .] brackets X-Act-Checkin: newlib-cygwin X-Git-Author: Corinna Vinschen X-Git-Refname: refs/heads/main X-Git-Oldrev: 149cabea8220c7baf1185ccfaea03922bbfd390a X-Git-Newrev: c36064bbd0c50323c15842c89a0024c780983e87 Message-Id: <20230301095541.A6599385841A@sourceware.org> Date: Wed, 1 Mar 2023 09:55:41 +0000 (GMT) List-Id: https://sourceware.org/git/gitweb.cgi?p=3Dnewlib-cygwin.git;h=3Dc36064bbd0c= 50323c15842c89a0024c780983e87 commit c36064bbd0c50323c15842c89a0024c780983e87 Author: Corinna Vinschen AuthorDate: Wed Mar 1 10:54:00 2023 +0100 Commit: Corinna Vinschen CommitDate: Wed Mar 1 10:54:52 2023 +0100 Cygwin: fnmatch: support collating symbols in [. .] brackets =20 This requires quite a few changes in how fnmatch operates. It always operates on wint_t strings now, just like regex and glob, and it always keeps a pointer on the character inside the string, rather than operating on a single character. =20 As a result, just drop the ifdef's for Cygwin. The code is non-portable now anyway... =20 Signed-off-by: Corinna Vinschen Diff: --- winsup/cygwin/libc/fnmatch.c | 288 ++++++++++++++++++++++++---------------= ---- 1 file changed, 158 insertions(+), 130 deletions(-) diff --git a/winsup/cygwin/libc/fnmatch.c b/winsup/cygwin/libc/fnmatch.c index d109c2124aba..8a229a142032 100644 --- a/winsup/cygwin/libc/fnmatch.c +++ b/winsup/cygwin/libc/fnmatch.c @@ -72,71 +72,77 @@ __FBSDID("$FreeBSD: head/lib/libc/gen/fnmatch.c 288309 = 2015-09-27 12:52:18Z jill #define RANGE_NOMATCH 0 #define RANGE_ERROR (-1) =20 -static int rangematch(const char *, wint_t, int, char **, mbstate_t *); +static int rangematch(const wint_t *, wint_t *, int, wint_t **, mbstate_t = *); =20 int -fnmatch(const char *pattern, const char *string, int flags) +fnmatch(const char *in_pattern, const char *in_string, int flags) { - const char *stringstart =3D string; - const char *bt_pattern, *bt_string; - mbstate_t patmbs, strmbs; + size_t pclen =3D strlen (in_pattern); + size_t sclen =3D strlen (in_string); + wint_t *pattern =3D (wint_t *) alloca ((pclen + 1) * sizeof (wint_t)); + wint_t *string =3D (wint_t *) alloca ((sclen + 1) * sizeof (wint_t)); + + const wint_t *stringstart =3D string; + const wint_t *bt_pattern, *bt_string; + mbstate_t patmbs =3D { 0 }; + mbstate_t strmbs =3D { 0 }; mbstate_t bt_patmbs, bt_strmbs; - char *newp; - char c; - wint_t pc, sc; - size_t pclen, sclen; + wint_t *newp; + wint_t *c; + wint_t *pc, *sc; + + pclen =3D mbsnrtowci (pattern, &in_pattern, (size_t) -1, pclen, &patmbs); + if (pclen =3D=3D (size_t) -1) + return (FNM_NOMATCH); + pattern[pclen] =3D '\0'; + sclen =3D mbsnrtowci (string, &in_string, (size_t) -1, sclen, &strmbs); + if (sclen =3D=3D (size_t) -1) + return (FNM_NOMATCH); + string[sclen] =3D '\0'; =20 bt_pattern =3D bt_string =3D NULL; for (;;) { - pclen =3D mbrtowi(&pc, pattern, MB_LEN_MAX, &patmbs); - if (pclen =3D=3D (size_t)-1 || pclen =3D=3D (size_t)-2) - return (FNM_NOMATCH); - pattern +=3D pclen; - sclen =3D mbrtowi(&sc, string, MB_LEN_MAX, &strmbs); - if (sclen =3D=3D (size_t)-1 || sclen =3D=3D (size_t)-2) { - sc =3D (unsigned char)*string; - sclen =3D 1; - memset(&strmbs, 0, sizeof(strmbs)); - } - switch (pc) { + pc =3D pattern++; + sc =3D string; + switch (*pc) { case EOS: - if ((flags & FNM_LEADING_DIR) && sc =3D=3D '/') + if ((flags & FNM_LEADING_DIR) && *sc =3D=3D '/') return (0); - if (sc =3D=3D EOS) + if (*sc =3D=3D EOS) return (0); goto backtrack; case '?': - if (sc =3D=3D EOS) + if (*sc =3D=3D EOS) return (FNM_NOMATCH); - if (sc =3D=3D '/' && (flags & FNM_PATHNAME)) + if (*sc =3D=3D '/' && (flags & FNM_PATHNAME)) goto backtrack; - if (sc =3D=3D '.' && (flags & FNM_PERIOD) && + if (*sc =3D=3D '.' && (flags & FNM_PERIOD) && (string =3D=3D stringstart || ((flags & FNM_PATHNAME) && *(string - 1) =3D=3D '/'))) goto backtrack; - string +=3D sclen; + ++string; break; case '*': - c =3D *pattern; + c =3D pattern; /* Collapse multiple stars. */ - while (c =3D=3D '*') - c =3D *++pattern; + while (*c =3D=3D '*') + *c =3D *++pattern; =20 - if (sc =3D=3D '.' && (flags & FNM_PERIOD) && + if (*sc =3D=3D '.' && (flags & FNM_PERIOD) && (string =3D=3D stringstart || ((flags & FNM_PATHNAME) && *(string - 1) =3D=3D '/'))) goto backtrack; =20 /* Optimize for pattern with * at end or before /. */ - if (c =3D=3D EOS) + if (*c =3D=3D EOS) if (flags & FNM_PATHNAME) return ((flags & FNM_LEADING_DIR) || - strchr(string, '/') =3D=3D NULL ? + wcichr(string, '/') =3D=3D NULL ? 0 : FNM_NOMATCH); else return (0); - else if (c =3D=3D '/' && flags & FNM_PATHNAME) { - if ((string =3D strchr(string, '/')) =3D=3D NULL) + else if (*c =3D=3D '/' && flags & FNM_PATHNAME) { + if ((string =3D wcichr(string, '/')) =3D=3D NULL) return (FNM_NOMATCH); break; } @@ -147,47 +153,46 @@ fnmatch(const char *pattern, const char *string, int = flags) * there is no way having it match more characters * can help us, given that we are already here. */ - bt_pattern =3D pattern, bt_patmbs =3D patmbs; - bt_string =3D string, bt_strmbs =3D strmbs; + bt_pattern =3D pattern; + bt_patmbs =3D patmbs; + bt_string =3D string; + bt_strmbs =3D strmbs; break; case '[': - if (sc =3D=3D EOS) + if (*sc =3D=3D EOS) return (FNM_NOMATCH); - if (sc =3D=3D '/' && (flags & FNM_PATHNAME)) + if (*sc =3D=3D '/' && (flags & FNM_PATHNAME)) goto backtrack; - if (sc =3D=3D '.' && (flags & FNM_PERIOD) && + if (*sc =3D=3D '.' && (flags & FNM_PERIOD) && (string =3D=3D stringstart || ((flags & FNM_PATHNAME) && *(string - 1) =3D=3D '/'))) goto backtrack; =20 - switch (rangematch(pattern, sc, flags, &newp, - &patmbs)) { + int ret =3D rangematch(pattern, sc, flags, &newp, + &patmbs); + switch (ret) { case RANGE_ERROR: goto norm; - case RANGE_MATCH: - pattern =3D newp; - break; case RANGE_NOMATCH: goto backtrack; + default: /* > 0 ... case RANGE_MATCH */ + pattern =3D newp; + break; } - string +=3D sclen; + string +=3D ret; break; case '\\': if (!(flags & FNM_NOESCAPE)) { - pclen =3D mbrtowi(&pc, pattern, MB_LEN_MAX, - &patmbs); - if (pclen =3D=3D (size_t)-1 || pclen =3D=3D (size_t)-2) - return (FNM_NOMATCH); - pattern +=3D pclen; + pc =3D pattern++; } fallthrough; default: norm: - string +=3D sclen; - if (pc =3D=3D sc) + ++string; + if (*pc =3D=3D *sc) ; else if ((flags & FNM_CASEFOLD) && - (towlower(pc) =3D=3D towlower(sc))) + (towlower(*pc) =3D=3D towlower(*sc))) ; else { backtrack: @@ -199,22 +204,16 @@ fnmatch(const char *pattern, const char *string, int = flags) */ if (bt_pattern =3D=3D NULL) return (FNM_NOMATCH); - sclen =3D mbrtowi(&sc, bt_string, MB_LEN_MAX, - &bt_strmbs); - if (sclen =3D=3D (size_t)-1 || - sclen =3D=3D (size_t)-2) { - sc =3D (unsigned char)*bt_string; - sclen =3D 1; - memset(&bt_strmbs, 0, - sizeof(bt_strmbs)); - } - if (sc =3D=3D EOS) + sc =3D (wint_t *) bt_string; + if (*sc =3D=3D EOS) return (FNM_NOMATCH); - if (sc =3D=3D '/' && flags & FNM_PATHNAME) + if (*sc =3D=3D '/' && flags & FNM_PATHNAME) return (FNM_NOMATCH); - bt_string +=3D sclen; - pattern =3D bt_pattern, patmbs =3D bt_patmbs; - string =3D bt_string, strmbs =3D bt_strmbs; + ++bt_string; + pattern =3D (wint_t *) bt_pattern; + patmbs =3D bt_patmbs; + string =3D (wint_t *) bt_string; + strmbs =3D bt_strmbs; } break; } @@ -222,18 +221,46 @@ fnmatch(const char *pattern, const char *string, int = flags) /* NOTREACHED */ } =20 +/* Return value is either '\0', ':', '.', '=3D', or '[' if no class + expression found. cptr_p is set to the next character which needs + checking. */ +static inline wint_t +check_classes_expr(const wint_t **cptr_p, wint_t *classbuf, size_t classbu= fsize) +{ + const wint_t *ctype =3D NULL; + const wint_t *cptr =3D *cptr_p; + + if (*cptr =3D=3D '[' && + (cptr[1] =3D=3D ':' || cptr[1] =3D=3D '.' || cptr[1] =3D=3D '=3D')) { + ctype =3D ++cptr; + while (*++cptr && (*cptr !=3D *ctype || cptr[1] !=3D ']')) + ; + if (!*cptr) + return '\0'; + if (classbuf) { + const wint_t *class_p =3D ctype + 1; + size_t clen =3D cptr - class_p; + + if (clen < classbufsize) + *wcipncpy (classbuf, class_p, clen) =3D '\0'; + else + ctype =3D NULL; + } + cptr +=3D 2; /* Advance cptr to next char after class expr. */ + } + *cptr_p =3D cptr; + return ctype ? *ctype : '['; +} + static int -rangematch(const char *pattern, wint_t test, int flags, char **newp, +rangematch(const wint_t *pattern, wint_t *test, int flags, wint_t **newp, mbstate_t *patmbs) { int negate, ok; - wint_t c, c2; - size_t pclen; - const char *origpat; -#ifndef __CYGWIN__ - struct xlocale_collate *table =3D - (struct xlocale_collate*)__get_locale()->components[XLC_COLLATE]; -#endif + wint_t *c, *c2; + //size_t pclen; + const wint_t *origpat; + size_t tlen =3D next_unicode_char (test); =20 /* * A bracket expression starting with an unquoted circumflex @@ -245,8 +272,10 @@ rangematch(const char *pattern, wint_t test, int flags= , char **newp, if ( (negate =3D (*pattern =3D=3D '!' || *pattern =3D=3D '^')) ) ++pattern; =20 - if (flags & FNM_CASEFOLD) - test =3D towlower(test); + if (flags & FNM_CASEFOLD) { + for (int idx =3D 0; idx < tlen; ++idx) + test[idx] =3D towlower(test[idx]); + } =20 /* * A right bracket shall lose its special meaning and represent @@ -256,6 +285,11 @@ rangematch(const char *pattern, wint_t test, int flags= , char **newp, ok =3D 0; origpat =3D pattern; for (;;) { + wint_t wclass[64], wclass2[64]; + char cclass[64]; + wint_t ctype; + size_t clen =3D 1, c2len =3D 1; + if (*pattern =3D=3D ']' && pattern > origpat) { pattern++; break; @@ -265,75 +299,69 @@ rangematch(const char *pattern, wint_t test, int flag= s, char **newp, return (RANGE_NOMATCH); } else if (*pattern =3D=3D '\\' && !(flags & FNM_NOESCAPE)) pattern++; - if (*pattern =3D=3D '[' && (pattern[1] =3D=3D ':' || pattern[1] =3D=3D '= .' - || pattern[1] =3D=3D '=3D')) { - const char ctype =3D *++pattern; - const char *class_p =3D ++pattern; - - while (*pattern - && (*pattern !=3D ctype || pattern[1] !=3D ']')) - ++pattern; - if (!*pattern) - return (RANGE_ERROR); - if (ctype =3D=3D ':') { /* named character class */ - size_t clen =3D pattern - class_p; - char class[clen + 1]; - - *stpncpy (class, class_p, clen) =3D '\0'; - if (iswctype (test, wctype (class))) - ok =3D 1; - } else if (ctype =3D=3D '=3D') { /* equivalence class */ - size_t elen =3D pattern - class_p; - char equiv[elen + 1]; - wint_t eqv; - - *stpncpy (equiv, class_p, elen) =3D '\0'; - if (mbrtowi(&eqv, equiv, elen, patmbs) =3D=3D elen - && is_unicode_equiv (test, eqv)) - ok =3D 1; - } - /* TODO: [. is just ignored for now */ - pattern +=3D 2; + switch (ctype =3D check_classes_expr (&pattern, wclass, 64)) { + case ':': + /* No worries, char classes are ASCII-only */ + wcitoascii (cclass, wclass); + if (iswctype (*test, wctype (cclass))) + ok =3D 1; continue; - + case '=3D': + if (wcilen (wclass) =3D=3D 1 && + is_unicode_equiv (*test, *wclass)) + ok =3D 1; + continue; + case '.': + if (!is_unicode_coll_elem (wclass)) + return (RANGE_NOMATCH); + c =3D wclass; + clen =3D wcilen (wclass); + break; + default: + c =3D (wint_t *) pattern++; + break; + } + if (flags & FNM_CASEFOLD) { + for (int idx =3D 0; idx < tlen; ++idx) + c[idx] =3D towlower(c[idx]); } - pclen =3D mbrtowi(&c, pattern, MB_LEN_MAX, patmbs); - if (pclen =3D=3D (size_t)-1 || pclen =3D=3D (size_t)-2) - return (RANGE_NOMATCH); - pattern +=3D pclen; - - if (flags & FNM_CASEFOLD) - c =3D towlower(c); =20 if (*pattern =3D=3D '-' && *(pattern + 1) !=3D EOS && *(pattern + 1) !=3D ']') { if (*++pattern =3D=3D '\\' && !(flags & FNM_NOESCAPE)) if (*pattern !=3D EOS) pattern++; - pclen =3D mbrtowi(&c2, pattern, MB_LEN_MAX, patmbs); - if (pclen =3D=3D (size_t)-1 || pclen =3D=3D (size_t)-2) - return (RANGE_NOMATCH); - pattern +=3D pclen; - if (c2 =3D=3D EOS) + const wint_t *orig_pattern =3D pattern; + switch (ctype =3D check_classes_expr (&pattern, wclass2, + 64)) { + case '.': + if (!is_unicode_coll_elem (wclass2)) + return (RANGE_NOMATCH); + c2 =3D wclass2; + c2len =3D wcilen (wclass2); + break; + default: + pattern =3D orig_pattern; + c2 =3D (wint_t *) pattern++; + } + if (*c2 =3D=3D EOS) return (RANGE_ERROR); =20 - if (flags & FNM_CASEFOLD) - c2 =3D towlower(c2); + if (flags & FNM_CASEFOLD) { + for (int idx =3D 0; idx < tlen; ++idx) + c2[idx] =3D towlower(c2[idx]); + } =20 -#ifdef __CYGWIN__ if ((!__get_current_collate_locale ()->win_locale[0]) ? -#else - if (table->__collate_load_error ? -#endif c <=3D test && test <=3D c2 : - __wcollate_range_cmp(c, test) <=3D 0 - && __wcollate_range_cmp(test, c2) <=3D 0 + __wscollate_range_cmp(c, test, clen, tlen) <=3D 0 + && __wscollate_range_cmp(test, c2, tlen, c2len) <=3D 0 ) ok =3D 1; - } else if (c =3D=3D test) + } else if (clen =3D=3D tlen && wcincmp (c, test, clen) =3D=3D 0) ok =3D 1; } =20 - *newp =3D (char *)pattern; - return (ok =3D=3D negate ? RANGE_NOMATCH : RANGE_MATCH); + *newp =3D (wint_t *) pattern; + return (ok =3D=3D negate ? RANGE_NOMATCH : tlen); }