From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1123) id E8DC83858C62; Mon, 28 Nov 2022 15:36:33 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E8DC83858C62 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable From: Michael Matz To: bfd-cvs@sourceware.org Subject: [binutils-gdb] Special case more simple patterns X-Act-Checkin: binutils-gdb X-Git-Author: Michael Matz X-Git-Refname: refs/heads/master X-Git-Oldrev: 4a6bdfb9baa27e29151c7e97ae2abbe902f53638 X-Git-Newrev: 049522cae9798e51dd0c58566a9a2c61ba9100a9 Message-Id: <20221128153633.E8DC83858C62@sourceware.org> Date: Mon, 28 Nov 2022 15:36:33 +0000 (GMT) X-BeenThere: binutils-cvs@sourceware.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Binutils-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 28 Nov 2022 15:36:34 -0000 https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3D049522cae979= 8e51dd0c58566a9a2c61ba9100a9 commit 049522cae9798e51dd0c58566a9a2c61ba9100a9 Author: Michael Matz Date: Tue Nov 22 15:24:14 2022 +0100 Special case more simple patterns =20 fnmatch is slow, so avoiding it in more cases is good. This implements a more generic version of match_simple_wild which needs some pre-processing of patterns. In particular it supports patterns of the form PREFIX*SUFFIX (where all parts are optional), i.e. a super set of what's handled now. Most section matchers of this form and hence don't need any calls to fnmatch anymore. =20 We retain the implementation of match_simple_wild for the filename matchers (they aren't called often enough to matter). Diff: --- ld/ld.h | 3 +- ld/ldlang.c | 125 ++++++++++++++++++++++++++++++++++++++++++++------------= ---- 2 files changed, 95 insertions(+), 33 deletions(-) diff --git a/ld/ld.h b/ld/ld.h index 2ac9f469d04..2a95e14e3b8 100644 --- a/ld/ld.h +++ b/ld/ld.h @@ -98,8 +98,9 @@ struct wildcard_spec { const char *name; struct name_list *exclude_name_list; - sort_type sorted; struct flag_info *section_flag_list; + size_t namelen, prefixlen, suffixlen; + sort_type sorted; }; =20 struct wildcard_list diff --git a/ld/ldlang.c b/ld/ldlang.c index 03daba6ef7f..5fc55dcd5dd 100644 --- a/ld/ldlang.c +++ b/ld/ldlang.c @@ -169,14 +169,75 @@ stat_alloc (size_t size) return obstack_alloc (&stat_obstack, size); } =20 +/* Code for handling simple wildcards without going through fnmatch, + which can be expensive because of charset translations etc. */ + +/* A simple wild is a literal string followed by a single '*', + where the literal part is at least 4 characters long. */ + +static bool +is_simple_wild (const char *name) +{ + size_t len =3D strcspn (name, "*?["); + return len >=3D 4 && name[len] =3D=3D '*' && name[len + 1] =3D=3D '\0'; +} + +static bool +match_simple_wild (const char *pattern, const char *name) +{ + /* The first four characters of the pattern are guaranteed valid + non-wildcard characters. So we can go faster. */ + if (pattern[0] !=3D name[0] || pattern[1] !=3D name[1] + || pattern[2] !=3D name[2] || pattern[3] !=3D name[3]) + return false; + + pattern +=3D 4; + name +=3D 4; + while (*pattern !=3D '*') + if (*name++ !=3D *pattern++) + return false; + + return true; +} + static int name_match (const char *pattern, const char *name) { + if (is_simple_wild (pattern)) + return !match_simple_wild (pattern, name); if (wildcardp (pattern)) return fnmatch (pattern, name, 0); return strcmp (pattern, name); } =20 +/* Given an analyzed wildcard_spec SPEC, match it against NAME, + returns zero on a match, non-zero if there's no match. */ + +static int +spec_match (const struct wildcard_spec *spec, const char *name) +{ + size_t nl =3D spec->namelen; + size_t pl =3D spec->prefixlen; + size_t sl =3D spec->suffixlen; + int r; + if (pl && (r =3D memcmp (spec->name, name, pl))) + return r; + if (sl) + { + size_t inputlen =3D strlen (name); + if (inputlen < sl) + return 1; + r =3D memcmp (spec->name + nl - sl, name + inputlen - sl, sl); + if (r) + return r; + } + if (nl =3D=3D pl + sl + 1 && spec->name[pl] =3D=3D '*') + return 0; + else if (nl > pl) + return fnmatch (spec->name + pl, name + pl, 0); + return name[nl]; +} + static char * ldirname (const char *name) { @@ -349,7 +410,7 @@ walk_wild_section_general (lang_wild_statement_type *pt= r, { const char *sname =3D bfd_section_name (s); =20 - skip =3D name_match (sec->spec.name, sname) !=3D 0; + skip =3D spec_match (&sec->spec, sname) !=3D 0; } =20 if (!skip) @@ -397,37 +458,6 @@ find_section (lang_input_statement_type *file, return cb_data.found_section; } =20 -/* Code for handling simple wildcards without going through fnmatch, - which can be expensive because of charset translations etc. */ - -/* A simple wild is a literal string followed by a single '*', - where the literal part is at least 4 characters long. */ - -static bool -is_simple_wild (const char *name) -{ - size_t len =3D strcspn (name, "*?["); - return len >=3D 4 && name[len] =3D=3D '*' && name[len + 1] =3D=3D '\0'; -} - -static bool -match_simple_wild (const char *pattern, const char *name) -{ - /* The first four characters of the pattern are guaranteed valid - non-wildcard characters. So we can go faster. */ - if (pattern[0] !=3D name[0] || pattern[1] !=3D name[1] - || pattern[2] !=3D name[2] || pattern[3] !=3D name[3]) - return false; - - pattern +=3D 4; - name +=3D 4; - while (*pattern !=3D '*') - if (*name++ !=3D *pattern++) - return false; - - return true; -} - /* Return the numerical value of the init_priority attribute from section name NAME. */ =20 @@ -821,6 +851,24 @@ wild_spec_can_overlap (const char *name1, const char *= name2) return memcmp (name1, name2, min_prefix_len) =3D=3D 0; } =20 +/* Like strcspn() but start to look from the end to beginning of + S. Returns the length of the suffix of S consisting entirely + of characters not in REJECT. */ + +static size_t +rstrcspn (const char *s, const char *reject) +{ + size_t len =3D strlen (s), sufflen =3D 0; + while (len--) + { + char c =3D s[len]; + if (strchr (reject, c) !=3D 0) + break; + sufflen++; + } + return sufflen; +} + /* Select specialized code to handle various kinds of wildcard statements. */ =20 @@ -840,6 +888,19 @@ analyze_walk_wild_section_handler (lang_wild_statement= _type *ptr) ptr->handler_data[3] =3D NULL; ptr->tree =3D NULL; =20 + for (sec =3D ptr->section_list; sec !=3D NULL; sec =3D sec->next) + { + if (sec->spec.name) + { + sec->spec.namelen =3D strlen (sec->spec.name); + sec->spec.prefixlen =3D strcspn (sec->spec.name, "?*["); + sec->spec.suffixlen =3D rstrcspn (sec->spec.name + sec->spec.prefixlen, + "?*]"); + } + else + sec->spec.namelen =3D sec->spec.prefixlen =3D sec->spec.suffixlen =3D 0; + } + /* Count how many wildcard_specs there are, and how many of those actually use wildcards in the name. Also, bail out if any of the wildcard names are NULL. (Can this actually happen?