From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <SRS0=X8go=3Z=suse.de=matz@sourceware.org>
Received: from smtp-out2.suse.de (smtp-out2.suse.de [IPv6:2001:67c:2178:6::1d])
	by sourceware.org (Postfix) with ESMTPS id 2E76F384EF50
	for <binutils@sourceware.org>; Fri, 25 Nov 2022 16:01:59 +0000 (GMT)
DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org 2E76F384EF50
Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=suse.de
Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=suse.de
Received: from relay2.suse.de (relay2.suse.de [149.44.160.134])
	by smtp-out2.suse.de (Postfix) with ESMTP id 69B9F1FD68
	for <binutils@sourceware.org>; Fri, 25 Nov 2022 16:01:58 +0000 (UTC)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=suse.de; s=susede2_rsa;
	t=1669392118; h=from:from:reply-to:date:date:message-id:message-id:to:to:cc:
	 mime-version:mime-version:content-type:content-type;
	bh=iyc9DfDD73zL4ERpeNgostZgEVM7eOp49LjKZ/ROgY4=;
	b=1tQseYrjHjtQSk1wmwSZD8mtuLdI5mtA6Fo3k9vufYOEYtkiQXY7gQDSTwtDGRm0cnix/N
	sZMI0dPx1TI8OR4CATkJloxPs4ZuJJbLHWtuvWBbLoIv0xhdnhyV+yk0cmYDVFIpcwJ47p
	EiNfD84TgpPEnLbulDtiPuFwW2sBgEc=
DKIM-Signature: v=1; a=ed25519-sha256; c=relaxed/relaxed; d=suse.de;
	s=susede2_ed25519; t=1669392118;
	h=from:from:reply-to:date:date:message-id:message-id:to:to:cc:
	 mime-version:mime-version:content-type:content-type;
	bh=iyc9DfDD73zL4ERpeNgostZgEVM7eOp49LjKZ/ROgY4=;
	b=/uf25seST4R5nyja7iXgNKkFyWPx7CHfGNS3ik3LE2q0iH8pS5pep0FGfimsFlK7any+gz
	EURnMb2CrptkOSAw==
Received: from wotan.suse.de (wotan.suse.de [10.160.0.1])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits))
	(No client certificate requested)
	by relay2.suse.de (Postfix) with ESMTPS id 630702C141
	for <binutils@sourceware.org>; Fri, 25 Nov 2022 16:01:58 +0000 (UTC)
Received: by wotan.suse.de (Postfix, from userid 10510)
	id 5767A6586; Fri, 25 Nov 2022 16:01:58 +0000 (UTC)
Received: from localhost (localhost [127.0.0.1])
	by wotan.suse.de (Postfix) with ESMTP id 5605D60ED
	for <binutils@sourceware.org>; Fri, 25 Nov 2022 16:01:58 +0000 (UTC)
Date: Fri, 25 Nov 2022 16:01:58 +0000 (UTC)
From: Michael Matz <matz@suse.de>
To: binutils@sourceware.org
Subject: [PATCH] Special case more simple patterns
Message-ID: <alpine.LSU.2.20.2211251600280.24878@wotan.suse.de>
User-Agent: Alpine 2.20 (LSU 67 2015-01-07)
MIME-Version: 1.0
Content-Type: text/plain; charset=US-ASCII
X-Spam-Status: No, score=-9.0 required=5.0 tests=BAYES_00,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,SPF_HELO_NONE,SPF_PASS,TXREP autolearn=ham autolearn_force=no version=3.4.6
X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org
List-Id: <binutils.sourceware.org>

fnmatch is slow, so avoiding it in more cases is good.  This implements
a more generic version of match_simple_wild which needs some
pre-processing of patterns.  In particular it supports patterns of the
form PREFIX*SUFFIX (where all parts are optional), i.e. a super set of
what's handled now.  Most section matchers of this form and hence don't
need any calls to fnmatch anymore.

We retain the implementation of match_simple_wild for the filename
matchers (they aren't called often enough to matter).
---
This is independendly useful, but will be used in the next-to-be-posted 
series about section-select.

Regtested on Alans target list.  Okay for master?


Ciao,
Michael.

 ld/ld.h     |   3 +-
 ld/ldlang.c | 120 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 90 insertions(+), 33 deletions(-)

diff --git a/ld/ld.h b/ld/ld.h
index 2ac9f469d04..2a95e14e3b8 100644
--- a/ld/ld.h
+++ b/ld/ld.h
@@ -98,8 +98,9 @@ struct wildcard_spec
 {
   const char *name;
   struct name_list *exclude_name_list;
-  sort_type sorted;
   struct flag_info *section_flag_list;
+  size_t namelen, prefixlen, suffixlen;
+  sort_type sorted;
 };
 
 struct wildcard_list
diff --git a/ld/ldlang.c b/ld/ldlang.c
index 03daba6ef7f..3274659aec4 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -169,14 +169,72 @@ stat_alloc (size_t size)
   return obstack_alloc (&stat_obstack, size);
 }
 
+/* Code for handling simple wildcards without going through fnmatch,
+   which can be expensive because of charset translations etc.  */
+
+/* A simple wild is a literal string followed by a single '*',
+   where the literal part is at least 4 characters long.  */
+
+static bool
+is_simple_wild (const char *name)
+{
+  size_t len = strcspn (name, "*?[");
+  return len >= 4 && name[len] == '*' && name[len + 1] == '\0';
+}
+
+static bool
+match_simple_wild (const char *pattern, const char *name)
+{
+  /* The first four characters of the pattern are guaranteed valid
+     non-wildcard characters.  So we can go faster.  */
+  if (pattern[0] != name[0] || pattern[1] != name[1]
+      || pattern[2] != name[2] || pattern[3] != name[3])
+    return false;
+
+  pattern += 4;
+  name += 4;
+  while (*pattern != '*')
+    if (*name++ != *pattern++)
+      return false;
+
+  return true;
+}
+
 static int
 name_match (const char *pattern, const char *name)
 {
+  if (is_simple_wild (pattern))
+    return !match_simple_wild (pattern, name);
   if (wildcardp (pattern))
     return fnmatch (pattern, name, 0);
   return strcmp (pattern, name);
 }
 
+static int
+spec_match (const struct wildcard_spec *spec, const char *name)
+{
+  size_t nl = spec->namelen;
+  size_t pl = spec->prefixlen;
+  size_t sl = spec->suffixlen;
+  int r;
+  if (pl && (r = memcmp (spec->name, name, pl)))
+    return r;
+  if (sl)
+    {
+      size_t inputlen = strlen (name);
+      if (inputlen < sl)
+	return 1;
+      r = memcmp (spec->name + nl - sl, name + inputlen - sl, sl);
+      if (r)
+	return r;
+    }
+  if (nl == pl + sl + 1 && spec->name[pl] == '*')
+    return 0;
+  else if (nl > pl)
+    return fnmatch (spec->name + pl, name + pl, 0);
+  return name[nl];
+}
+
 static char *
 ldirname (const char *name)
 {
@@ -349,7 +407,7 @@ walk_wild_section_general (lang_wild_statement_type *ptr,
 	    {
 	      const char *sname = bfd_section_name (s);
 
-	      skip = name_match (sec->spec.name, sname) != 0;
+	      skip = spec_match (&sec->spec, sname) != 0;
 	    }
 
 	  if (!skip)
@@ -397,37 +455,6 @@ find_section (lang_input_statement_type *file,
   return cb_data.found_section;
 }
 
-/* Code for handling simple wildcards without going through fnmatch,
-   which can be expensive because of charset translations etc.  */
-
-/* A simple wild is a literal string followed by a single '*',
-   where the literal part is at least 4 characters long.  */
-
-static bool
-is_simple_wild (const char *name)
-{
-  size_t len = strcspn (name, "*?[");
-  return len >= 4 && name[len] == '*' && name[len + 1] == '\0';
-}
-
-static bool
-match_simple_wild (const char *pattern, const char *name)
-{
-  /* The first four characters of the pattern are guaranteed valid
-     non-wildcard characters.  So we can go faster.  */
-  if (pattern[0] != name[0] || pattern[1] != name[1]
-      || pattern[2] != name[2] || pattern[3] != name[3])
-    return false;
-
-  pattern += 4;
-  name += 4;
-  while (*pattern != '*')
-    if (*name++ != *pattern++)
-      return false;
-
-  return true;
-}
-
 /* Return the numerical value of the init_priority attribute from
    section name NAME.  */
 
@@ -645,6 +672,7 @@ walk_wild_section_specs1_wild1 (lang_wild_statement_type *ptr,
     {
       const char *sname = bfd_section_name (s);
       bool skip = !match_simple_wild (wildsec0->spec.name, sname);
+      //bool skip = !!spec_match (&wildsec0->spec, sname);
 
       if (!skip)
 	walk_wild_consider_section (ptr, file, s, wildsec0, callback, data);
@@ -682,6 +710,7 @@ walk_wild_section_specs2_wild1 (lang_wild_statement_type *ptr,
 	{
 	  const char *sname = bfd_section_name (s);
 	  bool skip = !match_simple_wild (wildsec1->spec.name, sname);
+	  //bool skip = !!spec_match (&wildsec1->spec, sname);
 
 	  if (!skip)
 	    walk_wild_consider_section (ptr, file, s, wildsec1, callback,
@@ -821,6 +850,20 @@ wild_spec_can_overlap (const char *name1, const char *name2)
   return memcmp (name1, name2, min_prefix_len) == 0;
 }
 
+static size_t
+rstrcspn (const char *s, const char *reject)
+{
+  size_t len = strlen (s), sufflen = 0;
+  while (len--)
+    {
+      char c = s[len];
+      if (strchr (reject, c) != 0)
+	break;
+      sufflen++;
+    }
+  return sufflen;
+}
+
 /* Select specialized code to handle various kinds of wildcard
    statements.  */
 
@@ -840,6 +883,19 @@ analyze_walk_wild_section_handler (lang_wild_statement_type *ptr)
   ptr->handler_data[3] = NULL;
   ptr->tree = NULL;
 
+  for (sec = ptr->section_list; sec != NULL; sec = sec->next)
+    {
+      if (sec->spec.name)
+	{
+	  sec->spec.namelen = strlen (sec->spec.name);
+	  sec->spec.prefixlen = strcspn (sec->spec.name, "?*[");
+	  sec->spec.suffixlen = rstrcspn (sec->spec.name + sec->spec.prefixlen,
+					  "?*]");
+	}
+      else
+	sec->spec.namelen = sec->spec.prefixlen = sec->spec.suffixlen = 0;
+    }
+
   /* Count how many wildcard_specs there are, and how many of those
      actually use wildcards in the name.  Also, bail out if any of the
      wildcard names are NULL. (Can this actually happen?
-- 
2.36.1