* [PATCH] Special case more simple patterns
@ 2022-11-25 16:01 Michael Matz
2022-11-28 2:12 ` Alan Modra
0 siblings, 1 reply; 2+ messages in thread
From: Michael Matz @ 2022-11-25 16:01 UTC (permalink / raw)
To: binutils
fnmatch is slow, so avoiding it in more cases is good. This implements
a more generic version of match_simple_wild which needs some
pre-processing of patterns. In particular it supports patterns of the
form PREFIX*SUFFIX (where all parts are optional), i.e. a super set of
what's handled now. Most section matchers of this form and hence don't
need any calls to fnmatch anymore.
We retain the implementation of match_simple_wild for the filename
matchers (they aren't called often enough to matter).
---
This is independendly useful, but will be used in the next-to-be-posted
series about section-select.
Regtested on Alans target list. Okay for master?
Ciao,
Michael.
ld/ld.h | 3 +-
ld/ldlang.c | 120 ++++++++++++++++++++++++++++++++++++++--------------
2 files changed, 90 insertions(+), 33 deletions(-)
diff --git a/ld/ld.h b/ld/ld.h
index 2ac9f469d04..2a95e14e3b8 100644
--- a/ld/ld.h
+++ b/ld/ld.h
@@ -98,8 +98,9 @@ struct wildcard_spec
{
const char *name;
struct name_list *exclude_name_list;
- sort_type sorted;
struct flag_info *section_flag_list;
+ size_t namelen, prefixlen, suffixlen;
+ sort_type sorted;
};
struct wildcard_list
diff --git a/ld/ldlang.c b/ld/ldlang.c
index 03daba6ef7f..3274659aec4 100644
--- a/ld/ldlang.c
+++ b/ld/ldlang.c
@@ -169,14 +169,72 @@ stat_alloc (size_t size)
return obstack_alloc (&stat_obstack, size);
}
+/* Code for handling simple wildcards without going through fnmatch,
+ which can be expensive because of charset translations etc. */
+
+/* A simple wild is a literal string followed by a single '*',
+ where the literal part is at least 4 characters long. */
+
+static bool
+is_simple_wild (const char *name)
+{
+ size_t len = strcspn (name, "*?[");
+ return len >= 4 && name[len] == '*' && name[len + 1] == '\0';
+}
+
+static bool
+match_simple_wild (const char *pattern, const char *name)
+{
+ /* The first four characters of the pattern are guaranteed valid
+ non-wildcard characters. So we can go faster. */
+ if (pattern[0] != name[0] || pattern[1] != name[1]
+ || pattern[2] != name[2] || pattern[3] != name[3])
+ return false;
+
+ pattern += 4;
+ name += 4;
+ while (*pattern != '*')
+ if (*name++ != *pattern++)
+ return false;
+
+ return true;
+}
+
static int
name_match (const char *pattern, const char *name)
{
+ if (is_simple_wild (pattern))
+ return !match_simple_wild (pattern, name);
if (wildcardp (pattern))
return fnmatch (pattern, name, 0);
return strcmp (pattern, name);
}
+static int
+spec_match (const struct wildcard_spec *spec, const char *name)
+{
+ size_t nl = spec->namelen;
+ size_t pl = spec->prefixlen;
+ size_t sl = spec->suffixlen;
+ int r;
+ if (pl && (r = memcmp (spec->name, name, pl)))
+ return r;
+ if (sl)
+ {
+ size_t inputlen = strlen (name);
+ if (inputlen < sl)
+ return 1;
+ r = memcmp (spec->name + nl - sl, name + inputlen - sl, sl);
+ if (r)
+ return r;
+ }
+ if (nl == pl + sl + 1 && spec->name[pl] == '*')
+ return 0;
+ else if (nl > pl)
+ return fnmatch (spec->name + pl, name + pl, 0);
+ return name[nl];
+}
+
static char *
ldirname (const char *name)
{
@@ -349,7 +407,7 @@ walk_wild_section_general (lang_wild_statement_type *ptr,
{
const char *sname = bfd_section_name (s);
- skip = name_match (sec->spec.name, sname) != 0;
+ skip = spec_match (&sec->spec, sname) != 0;
}
if (!skip)
@@ -397,37 +455,6 @@ find_section (lang_input_statement_type *file,
return cb_data.found_section;
}
-/* Code for handling simple wildcards without going through fnmatch,
- which can be expensive because of charset translations etc. */
-
-/* A simple wild is a literal string followed by a single '*',
- where the literal part is at least 4 characters long. */
-
-static bool
-is_simple_wild (const char *name)
-{
- size_t len = strcspn (name, "*?[");
- return len >= 4 && name[len] == '*' && name[len + 1] == '\0';
-}
-
-static bool
-match_simple_wild (const char *pattern, const char *name)
-{
- /* The first four characters of the pattern are guaranteed valid
- non-wildcard characters. So we can go faster. */
- if (pattern[0] != name[0] || pattern[1] != name[1]
- || pattern[2] != name[2] || pattern[3] != name[3])
- return false;
-
- pattern += 4;
- name += 4;
- while (*pattern != '*')
- if (*name++ != *pattern++)
- return false;
-
- return true;
-}
-
/* Return the numerical value of the init_priority attribute from
section name NAME. */
@@ -645,6 +672,7 @@ walk_wild_section_specs1_wild1 (lang_wild_statement_type *ptr,
{
const char *sname = bfd_section_name (s);
bool skip = !match_simple_wild (wildsec0->spec.name, sname);
+ //bool skip = !!spec_match (&wildsec0->spec, sname);
if (!skip)
walk_wild_consider_section (ptr, file, s, wildsec0, callback, data);
@@ -682,6 +710,7 @@ walk_wild_section_specs2_wild1 (lang_wild_statement_type *ptr,
{
const char *sname = bfd_section_name (s);
bool skip = !match_simple_wild (wildsec1->spec.name, sname);
+ //bool skip = !!spec_match (&wildsec1->spec, sname);
if (!skip)
walk_wild_consider_section (ptr, file, s, wildsec1, callback,
@@ -821,6 +850,20 @@ wild_spec_can_overlap (const char *name1, const char *name2)
return memcmp (name1, name2, min_prefix_len) == 0;
}
+static size_t
+rstrcspn (const char *s, const char *reject)
+{
+ size_t len = strlen (s), sufflen = 0;
+ while (len--)
+ {
+ char c = s[len];
+ if (strchr (reject, c) != 0)
+ break;
+ sufflen++;
+ }
+ return sufflen;
+}
+
/* Select specialized code to handle various kinds of wildcard
statements. */
@@ -840,6 +883,19 @@ analyze_walk_wild_section_handler (lang_wild_statement_type *ptr)
ptr->handler_data[3] = NULL;
ptr->tree = NULL;
+ for (sec = ptr->section_list; sec != NULL; sec = sec->next)
+ {
+ if (sec->spec.name)
+ {
+ sec->spec.namelen = strlen (sec->spec.name);
+ sec->spec.prefixlen = strcspn (sec->spec.name, "?*[");
+ sec->spec.suffixlen = rstrcspn (sec->spec.name + sec->spec.prefixlen,
+ "?*]");
+ }
+ else
+ sec->spec.namelen = sec->spec.prefixlen = sec->spec.suffixlen = 0;
+ }
+
/* Count how many wildcard_specs there are, and how many of those
actually use wildcards in the name. Also, bail out if any of the
wildcard names are NULL. (Can this actually happen?
--
2.36.1
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH] Special case more simple patterns
2022-11-25 16:01 [PATCH] Special case more simple patterns Michael Matz
@ 2022-11-28 2:12 ` Alan Modra
0 siblings, 0 replies; 2+ messages in thread
From: Alan Modra @ 2022-11-28 2:12 UTC (permalink / raw)
To: Michael Matz; +Cc: binutils
On Fri, Nov 25, 2022 at 04:01:58PM +0000, Michael Matz via Binutils wrote:
> fnmatch is slow, so avoiding it in more cases is good. This implements
> a more generic version of match_simple_wild which needs some
> pre-processing of patterns. In particular it supports patterns of the
> form PREFIX*SUFFIX (where all parts are optional), i.e. a super set of
> what's handled now. Most section matchers of this form and hence don't
> need any calls to fnmatch anymore.
>
> We retain the implementation of match_simple_wild for the filename
> matchers (they aren't called often enough to matter).
> ---
> This is independendly useful, but will be used in the next-to-be-posted
> series about section-select.
OK, but please do put the comment for rstrcspn from your later patch
in with this one, and don't commit the following two hunks.
> @@ -645,6 +672,7 @@ walk_wild_section_specs1_wild1 (lang_wild_statement_type *ptr,
> {
> const char *sname = bfd_section_name (s);
> bool skip = !match_simple_wild (wildsec0->spec.name, sname);
> + //bool skip = !!spec_match (&wildsec0->spec, sname);
>
> if (!skip)
> walk_wild_consider_section (ptr, file, s, wildsec0, callback, data);
> @@ -682,6 +710,7 @@ walk_wild_section_specs2_wild1 (lang_wild_statement_type *ptr,
> {
> const char *sname = bfd_section_name (s);
> bool skip = !match_simple_wild (wildsec1->spec.name, sname);
> + //bool skip = !!spec_match (&wildsec1->spec, sname);
>
> if (!skip)
> walk_wild_consider_section (ptr, file, s, wildsec1, callback,
--
Alan Modra
Australia Development Lab, IBM
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2022-11-28 2:12 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-25 16:01 [PATCH] Special case more simple patterns Michael Matz
2022-11-28 2:12 ` Alan Modra
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).