From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <matz@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1123)
 id E8DC83858C62; Mon, 28 Nov 2022 15:36:33 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E8DC83858C62
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: quoted-printable
From: Michael Matz <matz@sourceware.org>
To: bfd-cvs@sourceware.org
Subject: [binutils-gdb] Special case more simple patterns
X-Act-Checkin: binutils-gdb
X-Git-Author: Michael Matz <matz@suse.de>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 4a6bdfb9baa27e29151c7e97ae2abbe902f53638
X-Git-Newrev: 049522cae9798e51dd0c58566a9a2c61ba9100a9
Message-Id: <20221128153633.E8DC83858C62@sourceware.org>
Date: Mon, 28 Nov 2022 15:36:33 +0000 (GMT)
X-BeenThere: binutils-cvs@sourceware.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Binutils-cvs mailing list <binutils-cvs.sourceware.org>
List-Unsubscribe: <https://sourceware.org/mailman/options/binutils-cvs>,
 <mailto:binutils-cvs-request@sourceware.org?subject=unsubscribe>
List-Archive: <https://sourceware.org/pipermail/binutils-cvs/>
List-Help: <mailto:binutils-cvs-request@sourceware.org?subject=help>
List-Subscribe: <https://sourceware.org/mailman/listinfo/binutils-cvs>,
 <mailto:binutils-cvs-request@sourceware.org?subject=subscribe>
X-List-Received-Date: Mon, 28 Nov 2022 15:36:34 -0000

https://sourceware.org/git/gitweb.cgi?p=3Dbinutils-gdb.git;h=3D049522cae979=
8e51dd0c58566a9a2c61ba9100a9

commit 049522cae9798e51dd0c58566a9a2c61ba9100a9
Author: Michael Matz <matz@suse.de>
Date:   Tue Nov 22 15:24:14 2022 +0100

    Special case more simple patterns
   =20
    fnmatch is slow, so avoiding it in more cases is good.  This implements
    a more generic version of match_simple_wild which needs some
    pre-processing of patterns.  In particular it supports patterns of the
    form PREFIX*SUFFIX (where all parts are optional), i.e. a super set of
    what's handled now.  Most section matchers of this form and hence don't
    need any calls to fnmatch anymore.
   =20
    We retain the implementation of match_simple_wild for the filename
    matchers (they aren't called often enough to matter).

Diff:
---
 ld/ld.h     |   3 +-
 ld/ldlang.c | 125 ++++++++++++++++++++++++++++++++++++++++++++------------=
----
 2 files changed, 95 insertions(+), 33 deletions(-)

diff --git a/ld/ld.h b/ld/ld.h
index 2ac9f469d04..2a95e14e3b8 100644
--- a/ld/ld.h
+++ b/ld/ld.h
@@ -98,8 +98,9 @@ struct wildcard_spec
 {
   const char *name;
   struct name_list *exclude_name_list;
-  sort_type sorted;
   struct flag_info *section_flag_list;
+  size_t namelen, prefixlen, suffixlen;
+  sort_type sorted;
 };
=20
 struct wildcard_list
diff --git a/ld/ldlang.c b/ld/ldlang.c
index 03daba6ef7f..5fc55dcd5dd 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -169,14 +169,75 @@ stat_alloc (size_t size)
   return obstack_alloc (&stat_obstack, size);
 }
=20
+/* Code for handling simple wildcards without going through fnmatch,
+   which can be expensive because of charset translations etc.  */
+
+/* A simple wild is a literal string followed by a single '*',
+   where the literal part is at least 4 characters long.  */
+
+static bool
+is_simple_wild (const char *name)
+{
+  size_t len =3D strcspn (name, "*?[");
+  return len >=3D 4 && name[len] =3D=3D '*' && name[len + 1] =3D=3D '\0';
+}
+
+static bool
+match_simple_wild (const char *pattern, const char *name)
+{
+  /* The first four characters of the pattern are guaranteed valid
+     non-wildcard characters.  So we can go faster.  */
+  if (pattern[0] !=3D name[0] || pattern[1] !=3D name[1]
+      || pattern[2] !=3D name[2] || pattern[3] !=3D name[3])
+    return false;
+
+  pattern +=3D 4;
+  name +=3D 4;
+  while (*pattern !=3D '*')
+    if (*name++ !=3D *pattern++)
+      return false;
+
+  return true;
+}
+
 static int
 name_match (const char *pattern, const char *name)
 {
+  if (is_simple_wild (pattern))
+    return !match_simple_wild (pattern, name);
   if (wildcardp (pattern))
     return fnmatch (pattern, name, 0);
   return strcmp (pattern, name);
 }
=20
+/* Given an analyzed wildcard_spec SPEC, match it against NAME,
+   returns zero on a match, non-zero if there's no match.  */
+
+static int
+spec_match (const struct wildcard_spec *spec, const char *name)
+{
+  size_t nl =3D spec->namelen;
+  size_t pl =3D spec->prefixlen;
+  size_t sl =3D spec->suffixlen;
+  int r;
+  if (pl && (r =3D memcmp (spec->name, name, pl)))
+    return r;
+  if (sl)
+    {
+      size_t inputlen =3D strlen (name);
+      if (inputlen < sl)
+	return 1;
+      r =3D memcmp (spec->name + nl - sl, name + inputlen - sl, sl);
+      if (r)
+	return r;
+    }
+  if (nl =3D=3D pl + sl + 1 && spec->name[pl] =3D=3D '*')
+    return 0;
+  else if (nl > pl)
+    return fnmatch (spec->name + pl, name + pl, 0);
+  return name[nl];
+}
+
 static char *
 ldirname (const char *name)
 {
@@ -349,7 +410,7 @@ walk_wild_section_general (lang_wild_statement_type *pt=
r,
 	    {
 	      const char *sname =3D bfd_section_name (s);
=20
-	      skip =3D name_match (sec->spec.name, sname) !=3D 0;
+	      skip =3D spec_match (&sec->spec, sname) !=3D 0;
 	    }
=20
 	  if (!skip)
@@ -397,37 +458,6 @@ find_section (lang_input_statement_type *file,
   return cb_data.found_section;
 }
=20
-/* Code for handling simple wildcards without going through fnmatch,
-   which can be expensive because of charset translations etc.  */
-
-/* A simple wild is a literal string followed by a single '*',
-   where the literal part is at least 4 characters long.  */
-
-static bool
-is_simple_wild (const char *name)
-{
-  size_t len =3D strcspn (name, "*?[");
-  return len >=3D 4 && name[len] =3D=3D '*' && name[len + 1] =3D=3D '\0';
-}
-
-static bool
-match_simple_wild (const char *pattern, const char *name)
-{
-  /* The first four characters of the pattern are guaranteed valid
-     non-wildcard characters.  So we can go faster.  */
-  if (pattern[0] !=3D name[0] || pattern[1] !=3D name[1]
-      || pattern[2] !=3D name[2] || pattern[3] !=3D name[3])
-    return false;
-
-  pattern +=3D 4;
-  name +=3D 4;
-  while (*pattern !=3D '*')
-    if (*name++ !=3D *pattern++)
-      return false;
-
-  return true;
-}
-
 /* Return the numerical value of the init_priority attribute from
    section name NAME.  */
=20
@@ -821,6 +851,24 @@ wild_spec_can_overlap (const char *name1, const char *=
name2)
   return memcmp (name1, name2, min_prefix_len) =3D=3D 0;
 }
=20
+/* Like strcspn() but start to look from the end to beginning of
+   S.  Returns the length of the suffix of S consisting entirely
+   of characters not in REJECT.  */
+
+static size_t
+rstrcspn (const char *s, const char *reject)
+{
+  size_t len =3D strlen (s), sufflen =3D 0;
+  while (len--)
+    {
+      char c =3D s[len];
+      if (strchr (reject, c) !=3D 0)
+	break;
+      sufflen++;
+    }
+  return sufflen;
+}
+
 /* Select specialized code to handle various kinds of wildcard
    statements.  */
=20
@@ -840,6 +888,19 @@ analyze_walk_wild_section_handler (lang_wild_statement=
_type *ptr)
   ptr->handler_data[3] =3D NULL;
   ptr->tree =3D NULL;
=20
+  for (sec =3D ptr->section_list; sec !=3D NULL; sec =3D sec->next)
+    {
+      if (sec->spec.name)
+	{
+	  sec->spec.namelen =3D strlen (sec->spec.name);
+	  sec->spec.prefixlen =3D strcspn (sec->spec.name, "?*[");
+	  sec->spec.suffixlen =3D rstrcspn (sec->spec.name + sec->spec.prefixlen,
+					  "?*]");
+	}
+      else
+	sec->spec.namelen =3D sec->spec.prefixlen =3D sec->spec.suffixlen =3D 0;
+    }
+
   /* Count how many wildcard_specs there are, and how many of those
      actually use wildcards in the name.  Also, bail out if any of the
      wildcard names are NULL. (Can this actually happen?