public inbox for gdb-patches@sourceware.org
 help / color / mirror / Atom feed
From: Max Yvon Zimmermann <max.yvon.zimmermann@campus.tu-berlin.de>
To: <gdb-patches@sourceware.org>
Subject: [PATCH v2] Add wildcard matching to substitute-path rules
Date: Thu, 4 Apr 2024 02:17:47 +0200	[thread overview]
Message-ID: <2c4b26c0-2541-44b4-9c37-a296f099a3cd@campus.tu-berlin.de> (raw)

Changes since v1:

The function strip_trailing_directory_separator_and_escape() is now fixed.

I have added a new function validate_substitute_path_pattern() that will validate any pattern during the registration of a new rule. If an invalid pattern is detected, an error message gets printed. I hope this will make the feature more usable. If this check would not be performed an invalid rule would still be visible with 'show substitute-path', but it would never match anything.

Use literal ',' and '\' in the documentation.

Change the example file names in the documentation.

More tests in the testsuite.

---
 gdb/NEWS                              |   5 +
 gdb/doc/gdb.texinfo                   |  29 ++++
 gdb/source.c                          | 106 +++++++++++----
 gdb/testsuite/gdb.base/subst-glob.exp | 182 ++++++++++++++++++++++++++
 gdb/utils.c                           | 100 ++++++++++++++
 gdb/utils.h                           |   2 +
 6 files changed, 401 insertions(+), 23 deletions(-)
 create mode 100644 gdb/testsuite/gdb.base/subst-glob.exp

diff --git a/gdb/NEWS b/gdb/NEWS
index feb3a37393a..5a041175507 100644
--- a/gdb/NEWS
+++ b/gdb/NEWS
@@ -90,6 +90,11 @@ show unwind-on-signal
   These new commands replaces the existing set/show unwindonsignal.  The
   old command is maintained as an alias.
 
+set substitute-path
+  This command now supports glob pattern matching for substitution
+  rules.  Wildcards '?' and '*' are supported.  Use '\' to escape
+  '?', '*' and '\' characters.
+
 * New features in the GDB remote stub, GDBserver
 
   ** The --remote-debug and --event-loop-debug command line options
diff --git a/gdb/doc/gdb.texinfo b/gdb/doc/gdb.texinfo
index 727f9275bfb..3bf7d99a153 100644
--- a/gdb/doc/gdb.texinfo
+++ b/gdb/doc/gdb.texinfo
@@ -9954,6 +9954,35 @@ For instance, if we had entered the following commands:
 use the second rule to rewrite @file{/usr/src/lib/foo.c} into
 @file{/mnt/src/lib/foo.c}.
 
+Rules can contain wildcards to match multiple paths.  The supported
+wildcards are @file{?} (to match any single character) and @file{*}
+(to match any string).  Wildcards will never match the path separator of
+the system.
+
+For instance, if we had entered the following command:
+
+@smallexample
+(@value{GDBP}) set substitute-path /build/*/include /mnt/include
+@end smallexample
+
+@noindent
+@value{GDBN} would then rewrite @file{/build/release/include/inc.h} into
+@file{/mnt/include/inc.h}.  Another file @file{/build/debug/include/inc.h}
+would also be rewritten as @file{/mnt/include/inc.h} using the same rule.
+
+Use @samp{\} to escape the characters @samp{?}, @samp{*} and @samp{\}.  Note
+that you need to escape any @samp{\} characters twice in the @value{GDBN}
+command line.
+
+So if we want to match a literal @samp{*} character in a rule, we would enter:
+
+@smallexample
+(@value{GDBP}) set substitute-path /foo\\*/bar /mnt/cross
+@end smallexample
+
+@noindent
+Now only the directory @file{/foo*/bar/} would match against the rule.
+
 
 @item unset substitute-path [path]
 @kindex unset substitute-path
diff --git a/gdb/source.c b/gdb/source.c
index 432301e2a71..9a2f47194af 100644
--- a/gdb/source.c
+++ b/gdb/source.c
@@ -16,6 +16,7 @@
    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
 
+#include "defs.h"
 #include "arch-utils.h"
 #include "symtab.h"
 #include "expression.h"
@@ -26,6 +27,7 @@
 #include "frame.h"
 #include "value.h"
 #include "gdbsupport/filestuff.h"
+#include "utils.h"
 
 #include <sys/types.h>
 #include <fcntl.h>
@@ -959,44 +961,69 @@ source_full_path_of (const char *filename,
   return 1;
 }
 
-/* Return non-zero if RULE matches PATH, that is if the rule can be
-   applied to PATH.  */
+/* Validate a substitute-path PATTERN.
+   Return 1 if PATTERN is valid.
+   Return 0 if PATTERN is invalid.  */
 
 static int
-substitute_path_rule_matches (const struct substitute_path_rule *rule,
-			      const char *path)
+validate_substitute_path_pattern (const char *pattern)
 {
-  const int from_len = rule->from.length ();
-  const int path_len = strlen (path);
+  /* Iterate backwards to find any unterminated escapes.  */
 
-  if (path_len < from_len)
-    return 0;
+  int backslash_count = 0;
+  for (int i = strlen (pattern) - 1; i >= 0; --i)
+    {
+      if (pattern[i] != '\\')
+        break;
+
+      ++backslash_count;
+    }
 
+  return (backslash_count % 2) == 0;
+}
+
+/* Return the position in PATH up until RULE matches PATH, that is if the rule
+   can be applied to PATH.
+   Return -1 if there is no match.  */
+
+static int
+substitute_path_rule_matches (const struct substitute_path_rule *rule,
+			      const char *path)
+{
   /* The substitution rules are anchored at the start of the path,
      so the path should start with rule->from.  */
 
-  if (filename_ncmp (path, rule->from.c_str (), from_len) != 0)
-    return 0;
+  const int result = gdb_fileprefix_fnmatch (rule->from.c_str (), path);
 
-  /* Make sure that the region in the path that matches the substitution
-     rule is immediately followed by a directory separator (or the end of
-     string character).  */
+  if (result != -1)
+    {
+      /* Make sure that the region in the path that matches the substitution
+        rule is immediately followed by a directory separator (or the end of
+        string character).  */
 
-  if (path[from_len] != '\0' && !IS_DIR_SEPARATOR (path[from_len]))
-    return 0;
+      if (path[result] != '\0' && !IS_DIR_SEPARATOR (path[result]))
+        return -1;
+    }
 
-  return 1;
+  return result;
 }
 
 /* Find the substitute-path rule that applies to PATH and return it.
+   Also set SUB_POS to the position in PATH up until the rule matches PATH.
    Return NULL if no rule applies.  */
 
 static struct substitute_path_rule *
-get_substitute_path_rule (const char *path)
+get_substitute_path_rule (const char *path, int &sub_pos)
 {
   for (substitute_path_rule &rule : substitute_path_rules)
-    if (substitute_path_rule_matches (&rule, path))
-      return &rule;
+    {
+      const int result = substitute_path_rule_matches (&rule, path);
+      if (result != -1)
+        {
+          sub_pos = result;
+          return &rule;
+        }
+    }
 
   return nullptr;
 }
@@ -1010,7 +1037,9 @@ get_substitute_path_rule (const char *path)
 gdb::unique_xmalloc_ptr<char>
 rewrite_source_path (const char *path)
 {
-  const struct substitute_path_rule *rule = get_substitute_path_rule (path);
+  int sub_pos;
+  const struct substitute_path_rule *rule
+    = get_substitute_path_rule (path, sub_pos);
 
   if (rule == nullptr)
     return nullptr;
@@ -1018,7 +1047,7 @@ rewrite_source_path (const char *path)
   /* Compute the rewritten path and return it.  */
 
   return (gdb::unique_xmalloc_ptr<char>
-	  (concat (rule->to.c_str (), path + rule->from.length (), nullptr)));
+	  (concat (rule->to.c_str (), path + sub_pos, nullptr)));
 }
 
 /* See source.h.  */
@@ -1718,6 +1747,34 @@ strip_trailing_directory_separator (char *path)
     path[last] = '\0';
 }
 
+/* If the last character of PATH is a directory separator, then strip it.
+   Also remove any related escape character (on DOS-based systems).  */
+
+static void
+strip_trailing_directory_separator_and_escape (char *path)
+{
+  const int last = strlen (path) - 1;
+
+  if (last < 0)
+    return;  /* No stripping is needed if PATH is the empty string.  */
+
+  if (!IS_DIR_SEPARATOR (path[last]))
+    return;
+
+#ifdef HAVE_DOS_BASED_FILE_SYSTEM
+  if (path[last] == '\\')
+    {
+      if (last < 1 || path[last - 1] != '\\')
+        return;
+
+      /* Remove any related escape character.  */
+      path[last - 1] = '\0';
+    }
+#endif
+
+  path[last] = '\0';
+}
+
 /* Add a new substitute-path rule at the end of the current list of rules.
    The new rule will replace FROM into TO.  */
 
@@ -1754,7 +1811,7 @@ show_substitute_path_command (const char *args, int from_tty)
 
   for (substitute_path_rule &rule : substitute_path_rules)
     {
-      if (from == NULL || substitute_path_rule_matches (&rule, from) != 0)
+      if (from == NULL || substitute_path_rule_matches (&rule, from) != -1)
 	gdb_printf ("  `%s' -> `%s'.\n", rule.from.c_str (),
 		    rule.to.c_str ());
     }
@@ -1830,9 +1887,12 @@ set_substitute_path_command (const char *args, int from_tty)
 
   /* Strip any trailing directory separator character in either FROM
      or TO.  The substitution rule already implicitly contains them.  */
-  strip_trailing_directory_separator (argv[0]);
+  strip_trailing_directory_separator_and_escape (argv[0]);
   strip_trailing_directory_separator (argv[1]);
 
+  if (!validate_substitute_path_pattern (argv[0]))
+    error (_("First argument is not a valid glob expression"));
+
   /* If a rule with the same "from" was previously defined, then
      delete it.  This new rule replaces it.  */
 
diff --git a/gdb/testsuite/gdb.base/subst-glob.exp b/gdb/testsuite/gdb.base/subst-glob.exp
new file mode 100644
index 00000000000..b364760671a
--- /dev/null
+++ b/gdb/testsuite/gdb.base/subst-glob.exp
@@ -0,0 +1,182 @@
+# Copyright 2006-2024 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+clean_restart
+
+# Do a bunch of testing of the substitute-path glob pattern matching.
+
+gdb_test_no_output "set confirm off" \
+        "deactivate GDB's confirmation interface"
+
+proc test_pattern { pattern path mode } {
+    # Escape backslashes so the GDB console can unescape them again.
+    set terminal_pattern [ \
+        string map { \
+            "\\" "\\\\" \
+        } $pattern \
+    ]
+
+    # Escape the pattern for regex matching.
+    set match_pattern [ \
+        string map { \
+            "*"  "\\\\*" \
+            "?"  "\\\\?" \
+            "\\" "\\\\\\\\" \
+        } $pattern \
+    ]
+
+    # Handle stripping of path separators.
+    if {[ishost "*-mingw*"]} {
+        regsub {(\\\\\\\\\\\\|/)$} $match_pattern {} match_pattern
+        set match_pattern [subst $match_pattern]
+    } else {
+        regsub {/$} $match_pattern {} match_pattern
+        set match_pattern [subst $match_pattern]
+    }
+
+    # Escape backslashes so the GDB console can unescape them again.
+    set terminal_path [ \
+        string map { \
+            "\\" "\\\\" \
+        } $path \
+    ]
+    
+    # Escape the path for regex matching.
+    set match_path [ \
+        string map { \
+            "*"  "\\*" \
+            "?"  "\\?" \
+            "\\" "\\\\" \
+        } $path \
+    ]
+    
+    if {$mode == "fail"} {
+        gdb_test "set substitute-path \"$terminal_pattern\" \"to\"" \
+            "First argument is not a valid glob expression" \
+            "set substitute-path $terminal_pattern (expect failure)"
+    } else {
+        gdb_test_no_output "unset substitute-path" \
+            "unset substitute-path before testing '$terminal_pattern' matches '$terminal_path'"
+
+        gdb_test_no_output "set substitute-path \"$terminal_pattern\" \"to\"" \
+            "set substitute-path $terminal_pattern before testing '$terminal_pattern' matches '$terminal_path'"
+    }
+    
+    if {$mode == "pos"} {
+        gdb_test "show substitute-path \"$terminal_path\"" \
+            "Source path substitution rule matching `$match_path':\r\n +`$match_pattern' -> `to'." \
+            "testing '$terminal_pattern' matches '$terminal_path'"
+    } elseif {$mode == "neg"} {
+        gdb_test "show substitute-path \"$terminal_path\"" \
+            "Source path substitution rule matching `$match_path':" \
+            "testing '$terminal_pattern' does not match '$terminal_path'"
+    }
+}
+
+proc test_pattern_pos { pattern path } {
+    test_pattern $pattern $path "pos"
+}
+
+proc test_pattern_neg { pattern path } {
+    test_pattern $pattern $path "neg"
+}
+
+proc test_pattern_fail { pattern } {
+    test_pattern $pattern "" "fail"
+}
+
+# Sanity checks.
+test_pattern_pos "path" "path"
+test_pattern_pos "path" "path/to"
+test_pattern_pos "/" "/test"
+test_pattern_pos "/testing" "/testing/test"
+test_pattern_pos "/testing/" "/testing/test"
+test_pattern_neg "path" "test"
+test_pattern_neg "///" "test"
+test_pattern_neg "/path//to" "/path/to"
+
+# '?' wildcard.
+test_pattern_pos "?atchone" "matchone"
+test_pattern_pos "pat?/to" "path/to"
+test_pattern_pos "path??" "pathto"
+test_pattern_pos "test?ng" "testing"
+test_pattern_pos "?" "?/test"
+test_pattern_neg "test?" "test/"
+test_pattern_neg "test?" "testing/"
+test_pattern_neg "?" ""
+
+# '*' wildcard.
+test_pattern_pos "*" "matchall"
+test_pattern_pos "path_*" "path_pattern"
+test_pattern_pos "test*/test" "testing/test"
+test_pattern_pos "test*" "testing/test"
+test_pattern_pos "test*" "test/test"
+test_pattern_pos "*" "testing/test"
+test_pattern_pos "*/*" "testing/test"
+test_pattern_pos "*/" "test/"
+test_pattern_pos "/*" "/test"
+test_pattern_pos "test*" "test/"
+test_pattern_pos "test*" "test"
+test_pattern_pos "test*test" "testtest"
+test_pattern_pos "test*test" "testingtest"
+test_pattern_pos "test*test" "testingtest/test"
+test_pattern_pos "*" "*test"
+test_pattern_pos "**" "t"
+test_pattern_pos "*" ""
+test_pattern_pos "*t*st" "foobartest"
+test_pattern_pos "*t*st" "foobartest/ing"
+test_pattern_pos "*t*st" "tetest"
+test_pattern_pos "*t*st" "tetest/ing"
+test_pattern_pos "*t*st" "testtest"
+test_pattern_pos "*t*st" "testtest/ing"
+test_pattern_neg "*test" "foobar"
+test_pattern_neg "*/test" "foo/bar"
+
+# Escapes.
+test_pattern_pos "\\\\" "\\"
+test_pattern_pos "\\\\*" "\\test"
+test_pattern_pos "*\\\\" "test\\"
+test_pattern_pos "\\\\/" "\\/"
+test_pattern_pos "\\*" "*"
+test_pattern_pos "\\?" "?"
+test_pattern_pos "\\*" "*/test"
+test_pattern_pos "\\?" "?/test"
+test_pattern_pos "\\//" "/"
+test_pattern_neg "\\//" "test"
+test_pattern_neg "\\*" "*test"
+test_pattern_neg "\\?" "?test"
+test_pattern_neg "\\*" "t"
+test_pattern_neg "\\?" "t"
+test_pattern_fail "\\"
+test_pattern_fail "\\/"
+test_pattern_fail "\\\\\\"
+test_pattern_fail "test\\"
+test_pattern_fail "test\\\\\\"
+
+if {[ishost "*-mingw*"]} {
+    # DOS tests.
+    test_pattern_pos "test" "TEST"
+    test_pattern_pos "/" "\\test"
+    test_pattern_pos "\\\\" "/test"
+    test_pattern_pos "*\\\\" "test/"
+}
+
+if {[ishost "*-linux*"]} {
+    # Unix tests.
+    test_pattern_neg "test" "TEST"
+    test_pattern_neg "/" "\\test"
+    test_pattern_fail "\\\\" "/test"
+    test_pattern_fail "*\\\\" "test/"
+}
diff --git a/gdb/utils.c b/gdb/utils.c
index ded03c74099..00597543051 100644
--- a/gdb/utils.c
+++ b/gdb/utils.c
@@ -3532,6 +3532,106 @@ gdb_filename_fnmatch (const char *pattern, const char *string, int flags)
   return fnmatch (pattern, string, flags);
 }
 
+/* Return the position in STRING up until a PATTERN expression is matched.
+   Return -1 if there is no match.
+   
+   Only the wildcards ? and * are supported. */
+
+int
+gdb_fileprefix_fnmatch (const char *pattern, const char *string)
+{
+  int string_pos = 0;
+  char pattern_c;
+  char string_c;
+
+  while (*pattern != '\0' && *string != '\0')
+    {
+      switch (*pattern)
+        {
+        /* Unescape and match the next character.  */
+        case '\\':
+          ++pattern;
+          if (*pattern == '\0')
+            return -1;
+          [[fallthrough]];
+
+        default:
+          pattern_c = *pattern;
+          string_c = *string;
+
+#ifdef HAVE_CASE_INSENSITIVE_FILE_SYSTEM
+          pattern_c = TOLOWER (pattern_c);
+          string_c = TOLOWER (string_c);
+#endif
+
+#ifdef HAVE_DOS_BASED_FILE_SYSTEM
+          /* On DOS-based file systems, the '/' and the '\' are equivalent.  */
+          if (pattern_c == '/')
+            pattern_c = '\\';
+          if (string_c == '/')
+            string_c = '\\';
+#endif
+
+          /* Compare the current character of the pattern with the path.  */
+          if (pattern_c != string_c)
+            return -1;
+          break;
+
+        /* Match any character.  */
+        case '?':
+          /* Directory separators are not matched by '?'.  */
+          if (IS_DIR_SEPARATOR (*string))
+            return -1;
+          break;
+
+        /* Match any string.  */
+        case '*':
+          int best_result = -1;
+
+          /* Try to match any folling substring.  */
+          while (true)
+            {
+              /* Most of these attempts will fail at the first character. */
+              int result = gdb_fileprefix_fnmatch (pattern+1, string);
+
+              if (result != -1)
+                {
+                  /* If there is a substring match, compare its result to the best
+                    candidate so far.  */
+                  result += string_pos;
+                  if (result > best_result)
+                    best_result = result;
+                }
+
+              /* Exit on a null byte or a directory separator.  */
+              if (*string == '\0' || IS_DIR_SEPARATOR (*string))
+                return best_result;
+
+              ++string;
+              ++string_pos;
+            }
+        }
+    
+        ++pattern;
+        ++string;
+        ++string_pos;
+      }
+
+  /* If the macthing is complete but there is still some of the pattern left,
+     we must ensure that the remaining pattern matches the empty string.  */
+  if (*pattern != '\0')
+    {
+      /* Only '*' can match the empty string.  */
+      while (*pattern == '*')
+        ++pattern;
+
+      if (*pattern != '\0')
+        return -1;
+    }
+
+  return string_pos;
+}
+
 /* Return the number of path elements in PATH.
    / = 1
    /foo = 2
diff --git a/gdb/utils.h b/gdb/utils.h
index 875a2583179..eaf3fe8a8c3 100644
--- a/gdb/utils.h
+++ b/gdb/utils.h
@@ -137,6 +137,8 @@ struct set_batch_flag_and_restore_page_info
 extern int gdb_filename_fnmatch (const char *pattern, const char *string,
 				 int flags);
 
+extern int gdb_fileprefix_fnmatch (const char *pattern, const char *string);
+
 extern void substitute_path_component (char **stringp, const char *from,
 				       const char *to);
 
-- 
2.34.1

             reply	other threads:[~2024-04-04  0:17 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-04  0:17 Max Yvon Zimmermann [this message]
2024-04-04  6:25 ` Eli Zaretskii

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2c4b26c0-2541-44b4-9c37-a296f099a3cd@campus.tu-berlin.de \
    --to=max.yvon.zimmermann@campus.tu-berlin.de \
    --cc=gdb-patches@sourceware.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).