From: Max Yvon Zimmermann <max.yvon.zimmermann@campus.tu-berlin.de>
To: <gdb-patches@sourceware.org>
Subject: [PATCH v2] Add wildcard matching to substitute-path rules
Date: Thu, 4 Apr 2024 02:17:47 +0200 [thread overview]
Message-ID: <2c4b26c0-2541-44b4-9c37-a296f099a3cd@campus.tu-berlin.de> (raw)
Changes since v1:
The function strip_trailing_directory_separator_and_escape() is now fixed.
I have added a new function validate_substitute_path_pattern() that will validate any pattern during the registration of a new rule. If an invalid pattern is detected, an error message gets printed. I hope this will make the feature more usable. If this check would not be performed an invalid rule would still be visible with 'show substitute-path', but it would never match anything.
Use literal ',' and '\' in the documentation.
Change the example file names in the documentation.
More tests in the testsuite.
---
gdb/NEWS | 5 +
gdb/doc/gdb.texinfo | 29 ++++
gdb/source.c | 106 +++++++++++----
gdb/testsuite/gdb.base/subst-glob.exp | 182 ++++++++++++++++++++++++++
gdb/utils.c | 100 ++++++++++++++
gdb/utils.h | 2 +
6 files changed, 401 insertions(+), 23 deletions(-)
create mode 100644 gdb/testsuite/gdb.base/subst-glob.exp
diff --git a/gdb/NEWS b/gdb/NEWS
index feb3a37393a..5a041175507 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -90,6 +90,11 @@ show unwind-on-signal
These new commands replaces the existing set/show unwindonsignal. The
old command is maintained as an alias.
+set substitute-path
+ This command now supports glob pattern matching for substitution
+ rules. Wildcards '?' and '*' are supported. Use '\' to escape
+ '?', '*' and '\' characters.
+
* New features in the GDB remote stub, GDBserver
** The --remote-debug and --event-loop-debug command line options
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 727f9275bfb..3bf7d99a153 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -9954,6 +9954,35 @@ For instance, if we had entered the following commands:
use the second rule to rewrite @file{/usr/src/lib/foo.c} into
@file{/mnt/src/lib/foo.c}.
+Rules can contain wildcards to match multiple paths. The supported
+wildcards are @file{?} (to match any single character) and @file{*}
+(to match any string). Wildcards will never match the path separator of
+the system.
+
+For instance, if we had entered the following command:
+
+@smallexample
+(@value{GDBP}) set substitute-path /build/*/include /mnt/include
+@end smallexample
+
+@noindent
+@value{GDBN} would then rewrite @file{/build/release/include/inc.h} into
+@file{/mnt/include/inc.h}. Another file @file{/build/debug/include/inc.h}
+would also be rewritten as @file{/mnt/include/inc.h} using the same rule.
+
+Use @samp{\} to escape the characters @samp{?}, @samp{*} and @samp{\}. Note
+that you need to escape any @samp{\} characters twice in the @value{GDBN}
+command line.
+
+So if we want to match a literal @samp{*} character in a rule, we would enter:
+
+@smallexample
+(@value{GDBP}) set substitute-path /foo\\*/bar /mnt/cross
+@end smallexample
+
+@noindent
+Now only the directory @file{/foo*/bar/} would match against the rule.
+
@item unset substitute-path [path]
@kindex unset substitute-path
diff --git a/gdb/source.c b/gdb/source.c
index 432301e2a71..9a2f47194af 100644
--- a/gdb/source.c
+++ b/gdb/source.c
@@ -16,6 +16,7 @@
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
+#include "defs.h"
#include "arch-utils.h"
#include "symtab.h"
#include "expression.h"
@@ -26,6 +27,7 @@
#include "frame.h"
#include "value.h"
#include "gdbsupport/filestuff.h"
+#include "utils.h"
#include <sys/types.h>
#include <fcntl.h>
@@ -959,44 +961,69 @@ source_full_path_of (const char *filename,
return 1;
}
-/* Return non-zero if RULE matches PATH, that is if the rule can be
- applied to PATH. */
+/* Validate a substitute-path PATTERN.
+ Return 1 if PATTERN is valid.
+ Return 0 if PATTERN is invalid. */
static int
-substitute_path_rule_matches (const struct substitute_path_rule *rule,
- const char *path)
+validate_substitute_path_pattern (const char *pattern)
{
- const int from_len = rule->from.length ();
- const int path_len = strlen (path);
+ /* Iterate backwards to find any unterminated escapes. */
- if (path_len < from_len)
- return 0;
+ int backslash_count = 0;
+ for (int i = strlen (pattern) - 1; i >= 0; --i)
+ {
+ if (pattern[i] != '\\')
+ break;
+
+ ++backslash_count;
+ }
+ return (backslash_count % 2) == 0;
+}
+
+/* Return the position in PATH up until RULE matches PATH, that is if the rule
+ can be applied to PATH.
+ Return -1 if there is no match. */
+
+static int
+substitute_path_rule_matches (const struct substitute_path_rule *rule,
+ const char *path)
+{
/* The substitution rules are anchored at the start of the path,
so the path should start with rule->from. */
- if (filename_ncmp (path, rule->from.c_str (), from_len) != 0)
- return 0;
+ const int result = gdb_fileprefix_fnmatch (rule->from.c_str (), path);
- /* Make sure that the region in the path that matches the substitution
- rule is immediately followed by a directory separator (or the end of
- string character). */
+ if (result != -1)
+ {
+ /* Make sure that the region in the path that matches the substitution
+ rule is immediately followed by a directory separator (or the end of
+ string character). */
- if (path[from_len] != '\0' && !IS_DIR_SEPARATOR (path[from_len]))
- return 0;
+ if (path[result] != '\0' && !IS_DIR_SEPARATOR (path[result]))
+ return -1;
+ }
- return 1;
+ return result;
}
/* Find the substitute-path rule that applies to PATH and return it.
+ Also set SUB_POS to the position in PATH up until the rule matches PATH.
Return NULL if no rule applies. */
static struct substitute_path_rule *
-get_substitute_path_rule (const char *path)
+get_substitute_path_rule (const char *path, int &sub_pos)
{
for (substitute_path_rule &rule : substitute_path_rules)
- if (substitute_path_rule_matches (&rule, path))
- return &rule;
+ {
+ const int result = substitute_path_rule_matches (&rule, path);
+ if (result != -1)
+ {
+ sub_pos = result;
+ return &rule;
+ }
+ }
return nullptr;
}
@@ -1010,7 +1037,9 @@ get_substitute_path_rule (const char *path)
gdb::unique_xmalloc_ptr<char>
rewrite_source_path (const char *path)
{
- const struct substitute_path_rule *rule = get_substitute_path_rule (path);
+ int sub_pos;
+ const struct substitute_path_rule *rule
+ = get_substitute_path_rule (path, sub_pos);
if (rule == nullptr)
return nullptr;
@@ -1018,7 +1047,7 @@ rewrite_source_path (const char *path)
/* Compute the rewritten path and return it. */
return (gdb::unique_xmalloc_ptr<char>
- (concat (rule->to.c_str (), path + rule->from.length (), nullptr)));
+ (concat (rule->to.c_str (), path + sub_pos, nullptr)));
}
/* See source.h. */
@@ -1718,6 +1747,34 @@ strip_trailing_directory_separator (char *path)
path[last] = '\0';
}
+/* If the last character of PATH is a directory separator, then strip it.
+ Also remove any related escape character (on DOS-based systems). */
+
+static void
+strip_trailing_directory_separator_and_escape (char *path)
+{
+ const int last = strlen (path) - 1;
+
+ if (last < 0)
+ return; /* No stripping is needed if PATH is the empty string. */
+
+ if (!IS_DIR_SEPARATOR (path[last]))
+ return;
+
+#ifdef HAVE_DOS_BASED_FILE_SYSTEM
+ if (path[last] == '\\')
+ {
+ if (last < 1 || path[last - 1] != '\\')
+ return;
+
+ /* Remove any related escape character. */
+ path[last - 1] = '\0';
+ }
+#endif
+
+ path[last] = '\0';
+}
+
/* Add a new substitute-path rule at the end of the current list of rules.
The new rule will replace FROM into TO. */
@@ -1754,7 +1811,7 @@ show_substitute_path_command (const char *args, int from_tty)
for (substitute_path_rule &rule : substitute_path_rules)
{
- if (from == NULL || substitute_path_rule_matches (&rule, from) != 0)
+ if (from == NULL || substitute_path_rule_matches (&rule, from) != -1)
gdb_printf (" `%s' -> `%s'.\n", rule.from.c_str (),
rule.to.c_str ());
}
@@ -1830,9 +1887,12 @@ set_substitute_path_command (const char *args, int from_tty)
/* Strip any trailing directory separator character in either FROM
or TO. The substitution rule already implicitly contains them. */
- strip_trailing_directory_separator (argv[0]);
+ strip_trailing_directory_separator_and_escape (argv[0]);
strip_trailing_directory_separator (argv[1]);
+ if (!validate_substitute_path_pattern (argv[0]))
+ error (_("First argument is not a valid glob expression"));
+
/* If a rule with the same "from" was previously defined, then
delete it. This new rule replaces it. */
diff --git a/gdb/testsuite/gdb.base/subst-glob.exp b/gdb/testsuite/gdb.base/subst-glob.exp
new file mode 100644
index 00000000000..b364760671a
--- /dev/null
+++ b/gdb/testsuite/gdb.base/subst-glob.exp
@@ -0,0 +1,182 @@
+# Copyright 2006-2024 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+clean_restart
+
+# Do a bunch of testing of the substitute-path glob pattern matching.
+
+gdb_test_no_output "set confirm off" \
+ "deactivate GDB's confirmation interface"
+
+proc test_pattern { pattern path mode } {
+ # Escape backslashes so the GDB console can unescape them again.
+ set terminal_pattern [ \
+ string map { \
+ "\\" "\\\\" \
+ } $pattern \
+ ]
+
+ # Escape the pattern for regex matching.
+ set match_pattern [ \
+ string map { \
+ "*" "\\\\*" \
+ "?" "\\\\?" \
+ "\\" "\\\\\\\\" \
+ } $pattern \
+ ]
+
+ # Handle stripping of path separators.
+ if {[ishost "*-mingw*"]} {
+ regsub {(\\\\\\\\\\\\|/)$} $match_pattern {} match_pattern
+ set match_pattern [subst $match_pattern]
+ } else {
+ regsub {/$} $match_pattern {} match_pattern
+ set match_pattern [subst $match_pattern]
+ }
+
+ # Escape backslashes so the GDB console can unescape them again.
+ set terminal_path [ \
+ string map { \
+ "\\" "\\\\" \
+ } $path \
+ ]
+
+ # Escape the path for regex matching.
+ set match_path [ \
+ string map { \
+ "*" "\\*" \
+ "?" "\\?" \
+ "\\" "\\\\" \
+ } $path \
+ ]
+
+ if {$mode == "fail"} {
+ gdb_test "set substitute-path \"$terminal_pattern\" \"to\"" \
+ "First argument is not a valid glob expression" \
+ "set substitute-path $terminal_pattern (expect failure)"
+ } else {
+ gdb_test_no_output "unset substitute-path" \
+ "unset substitute-path before testing '$terminal_pattern' matches '$terminal_path'"
+
+ gdb_test_no_output "set substitute-path \"$terminal_pattern\" \"to\"" \
+ "set substitute-path $terminal_pattern before testing '$terminal_pattern' matches '$terminal_path'"
+ }
+
+ if {$mode == "pos"} {
+ gdb_test "show substitute-path \"$terminal_path\"" \
+ "Source path substitution rule matching `$match_path':\r\n +`$match_pattern' -> `to'." \
+ "testing '$terminal_pattern' matches '$terminal_path'"
+ } elseif {$mode == "neg"} {
+ gdb_test "show substitute-path \"$terminal_path\"" \
+ "Source path substitution rule matching `$match_path':" \
+ "testing '$terminal_pattern' does not match '$terminal_path'"
+ }
+}
+
+proc test_pattern_pos { pattern path } {
+ test_pattern $pattern $path "pos"
+}
+
+proc test_pattern_neg { pattern path } {
+ test_pattern $pattern $path "neg"
+}
+
+proc test_pattern_fail { pattern } {
+ test_pattern $pattern "" "fail"
+}
+
+# Sanity checks.
+test_pattern_pos "path" "path"
+test_pattern_pos "path" "path/to"
+test_pattern_pos "/" "/test"
+test_pattern_pos "/testing" "/testing/test"
+test_pattern_pos "/testing/" "/testing/test"
+test_pattern_neg "path" "test"
+test_pattern_neg "///" "test"
+test_pattern_neg "/path//to" "/path/to"
+
+# '?' wildcard.
+test_pattern_pos "?atchone" "matchone"
+test_pattern_pos "pat?/to" "path/to"
+test_pattern_pos "path??" "pathto"
+test_pattern_pos "test?ng" "testing"
+test_pattern_pos "?" "?/test"
+test_pattern_neg "test?" "test/"
+test_pattern_neg "test?" "testing/"
+test_pattern_neg "?" ""
+
+# '*' wildcard.
+test_pattern_pos "*" "matchall"
+test_pattern_pos "path_*" "path_pattern"
+test_pattern_pos "test*/test" "testing/test"
+test_pattern_pos "test*" "testing/test"
+test_pattern_pos "test*" "test/test"
+test_pattern_pos "*" "testing/test"
+test_pattern_pos "*/*" "testing/test"
+test_pattern_pos "*/" "test/"
+test_pattern_pos "/*" "/test"
+test_pattern_pos "test*" "test/"
+test_pattern_pos "test*" "test"
+test_pattern_pos "test*test" "testtest"
+test_pattern_pos "test*test" "testingtest"
+test_pattern_pos "test*test" "testingtest/test"
+test_pattern_pos "*" "*test"
+test_pattern_pos "**" "t"
+test_pattern_pos "*" ""
+test_pattern_pos "*t*st" "foobartest"
+test_pattern_pos "*t*st" "foobartest/ing"
+test_pattern_pos "*t*st" "tetest"
+test_pattern_pos "*t*st" "tetest/ing"
+test_pattern_pos "*t*st" "testtest"
+test_pattern_pos "*t*st" "testtest/ing"
+test_pattern_neg "*test" "foobar"
+test_pattern_neg "*/test" "foo/bar"
+
+# Escapes.
+test_pattern_pos "\\\\" "\\"
+test_pattern_pos "\\\\*" "\\test"
+test_pattern_pos "*\\\\" "test\\"
+test_pattern_pos "\\\\/" "\\/"
+test_pattern_pos "\\*" "*"
+test_pattern_pos "\\?" "?"
+test_pattern_pos "\\*" "*/test"
+test_pattern_pos "\\?" "?/test"
+test_pattern_pos "\\//" "/"
+test_pattern_neg "\\//" "test"
+test_pattern_neg "\\*" "*test"
+test_pattern_neg "\\?" "?test"
+test_pattern_neg "\\*" "t"
+test_pattern_neg "\\?" "t"
+test_pattern_fail "\\"
+test_pattern_fail "\\/"
+test_pattern_fail "\\\\\\"
+test_pattern_fail "test\\"
+test_pattern_fail "test\\\\\\"
+
+if {[ishost "*-mingw*"]} {
+ # DOS tests.
+ test_pattern_pos "test" "TEST"
+ test_pattern_pos "/" "\\test"
+ test_pattern_pos "\\\\" "/test"
+ test_pattern_pos "*\\\\" "test/"
+}
+
+if {[ishost "*-linux*"]} {
+ # Unix tests.
+ test_pattern_neg "test" "TEST"
+ test_pattern_neg "/" "\\test"
+ test_pattern_fail "\\\\" "/test"
+ test_pattern_fail "*\\\\" "test/"
+}
diff --git a/gdb/utils.c b/gdb/utils.c
index ded03c74099..00597543051 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -3532,6 +3532,106 @@ gdb_filename_fnmatch (const char *pattern, const char *string, int flags)
return fnmatch (pattern, string, flags);
}
+/* Return the position in STRING up until a PATTERN expression is matched.
+ Return -1 if there is no match.
+
+ Only the wildcards ? and * are supported. */
+
+int
+gdb_fileprefix_fnmatch (const char *pattern, const char *string)
+{
+ int string_pos = 0;
+ char pattern_c;
+ char string_c;
+
+ while (*pattern != '\0' && *string != '\0')
+ {
+ switch (*pattern)
+ {
+ /* Unescape and match the next character. */
+ case '\\':
+ ++pattern;
+ if (*pattern == '\0')
+ return -1;
+ [[fallthrough]];
+
+ default:
+ pattern_c = *pattern;
+ string_c = *string;
+
+#ifdef HAVE_CASE_INSENSITIVE_FILE_SYSTEM
+ pattern_c = TOLOWER (pattern_c);
+ string_c = TOLOWER (string_c);
+#endif
+
+#ifdef HAVE_DOS_BASED_FILE_SYSTEM
+ /* On DOS-based file systems, the '/' and the '\' are equivalent. */
+ if (pattern_c == '/')
+ pattern_c = '\\';
+ if (string_c == '/')
+ string_c = '\\';
+#endif
+
+ /* Compare the current character of the pattern with the path. */
+ if (pattern_c != string_c)
+ return -1;
+ break;
+
+ /* Match any character. */
+ case '?':
+ /* Directory separators are not matched by '?'. */
+ if (IS_DIR_SEPARATOR (*string))
+ return -1;
+ break;
+
+ /* Match any string. */
+ case '*':
+ int best_result = -1;
+
+ /* Try to match any folling substring. */
+ while (true)
+ {
+ /* Most of these attempts will fail at the first character. */
+ int result = gdb_fileprefix_fnmatch (pattern+1, string);
+
+ if (result != -1)
+ {
+ /* If there is a substring match, compare its result to the best
+ candidate so far. */
+ result += string_pos;
+ if (result > best_result)
+ best_result = result;
+ }
+
+ /* Exit on a null byte or a directory separator. */
+ if (*string == '\0' || IS_DIR_SEPARATOR (*string))
+ return best_result;
+
+ ++string;
+ ++string_pos;
+ }
+ }
+
+ ++pattern;
+ ++string;
+ ++string_pos;
+ }
+
+ /* If the macthing is complete but there is still some of the pattern left,
+ we must ensure that the remaining pattern matches the empty string. */
+ if (*pattern != '\0')
+ {
+ /* Only '*' can match the empty string. */
+ while (*pattern == '*')
+ ++pattern;
+
+ if (*pattern != '\0')
+ return -1;
+ }
+
+ return string_pos;
+}
+
/* Return the number of path elements in PATH.
/ = 1
/foo = 2
diff --git a/gdb/utils.h b/gdb/utils.h
index 875a2583179..eaf3fe8a8c3 100644
--- a/gdb/utils.h
+++ b/gdb/utils.h
@@ -137,6 +137,8 @@ struct set_batch_flag_and_restore_page_info
extern int gdb_filename_fnmatch (const char *pattern, const char *string,
int flags);
+extern int gdb_fileprefix_fnmatch (const char *pattern, const char *string);
+
extern void substitute_path_component (char **stringp, const char *from,
const char *to);
--
2.34.1
next reply other threads:[~2024-04-04 0:17 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-04 0:17 Max Yvon Zimmermann [this message]
2024-04-04 6:25 ` Eli Zaretskii
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=2c4b26c0-2541-44b4-9c37-a296f099a3cd@campus.tu-berlin.de \
--to=max.yvon.zimmermann@campus.tu-berlin.de \
--cc=gdb-patches@sourceware.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).