public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Support the new ("v0") mangling scheme in rust-demangle.
@ 2020-03-13 20:28 Eduard-Mihai Burtescu
  2020-03-23  1:34 ` Eduard-Mihai Burtescu
  2020-04-06 21:52 ` Eduard-Mihai Burtescu
  0 siblings, 2 replies; 20+ messages in thread
From: Eduard-Mihai Burtescu @ 2020-03-13 20:28 UTC (permalink / raw)
  To: gcc-patches; +Cc: ian, iant

[-- Attachment #1: Type: text/plain, Size: 1072 bytes --]

This is the libiberty (mainly for binutils/gdb) counterpart of
https://github.com/alexcrichton/rustc-demangle/pull/23.

Relevant links for the new Rust mangling scheme (aka "v0"):
* Rust RFC: https://github.com/rust-lang/rfcs/pull/2603
* tracking issue: https://github.com/rust-lang/rust/issues/60705
* implementation: https://github.com/rust-lang/rust/pull/57967

This implementation includes full support for UTF-8 identifiers
via punycode, so I've included a testcase for that as well.
(Let me know if it causes any issues and I'll take it out)

Last year I've submitted several small patches to rust-demangle
in preparation for upstreaming the entire new demangler, and
feedback from that has proven useful.
For example, I started with error-handling macros, but instead
the code now has "rdm->errored = 1;" before several returns/gotos.

The patch is attached instead of inline, as it's over 1000 lines long.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Also, I have no commit access, so I'd be thankful if
someone would commit this for me if/once approved.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-Support-the-new-v0-mangling-scheme-in-rust-demangle.patch --]
[-- Type: text/x-patch; name="0001-Support-the-new-v0-mangling-scheme-in-rust-demangle.patch", Size: 28349 bytes --]

From 689442d88b87453a697d2121e45a55071cb9056f Mon Sep 17 00:00:00 2001
From: Eduard-Mihai Burtescu <eddyb@lyken.rs>
Date: Fri, 13 Mar 2020 21:07:28 +0200
Subject: [PATCH] Support the new ("v0") mangling scheme in rust-demangle.

This is the libiberty (mainly for binutils/gdb) counterpart of
https://github.com/alexcrichton/rustc-demangle/pull/23.

Relevant links for the new Rust mangling scheme (aka "v0"):
* Rust RFC: https://github.com/rust-lang/rfcs/pull/2603
* tracking issue: https://github.com/rust-lang/rust/issues/60705
* implementation: https://github.com/rust-lang/rust/pull/57967

This implementation includes full support for UTF-8 identifiers
via punycode, so I've included a testcase for that as well.

2020-03-13  Eduard-Mihai Burtescu  <eddyb@lyken.rs>
libiberty/ChangeLog:
	* rust-demangle.c (struct rust_demangler): Add
	skipping_printing and bound_lifetime_depth fields.
	(eat): Add (v0-only).
	(parse_integer_62): Add (v0-only).
	(parse_opt_integer_62): Add (v0-only).
	(parse_disambiguator): Add (v0-only).
	(struct rust_mangled_ident): Add punycode{,_len} fields.
	(parse_ident): Support v0 identifiers.
	(print_str): Respect skipping_printing.
	(print_uint64): Add (v0-only).
	(print_uint64_hex): Add (v0-only).
	(print_ident): Respect skipping_printing,
	Support v0 identifiers.
	(print_lifetime_from_index): Add (v0-only).
	(demangle_binder): Add (v0-only).
	(demangle_path): Add (v0-only).
	(demangle_generic_arg): Add (v0-only).
	(demangle_type): Add (v0-only).
	(demangle_path_maybe_open_generics): Add (v0-only).
	(demangle_dyn_trait): Add (v0-only).
	(demangle_const): Add (v0-only).
	(demangle_const_uint): Add (v0-only).
	(basic_type): Add (v0-only).
	(rust_demangle_callback): Support v0 symbols.
	* testsuite/rust-demangle-expected: Add v0 testcases.

diff --git a/libiberty/rust-demangle.c b/libiberty/rust-demangle.c
index b87365c85fe9..9a31392458e0 100644
--- a/libiberty/rust-demangle.c
+++ b/libiberty/rust-demangle.c
@@ -1,6 +1,7 @@
 /* Demangler for the Rust programming language
    Copyright (C) 2016-2020 Free Software Foundation, Inc.
    Written by David Tolnay (dtolnay@gmail.com).
+   Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
 
 This file is part of the libiberty library.
 Libiberty is free software; you can redistribute it and/or
@@ -64,11 +65,16 @@ struct rust_demangler
   /* Non-zero if any error occurred. */
   int errored;
 
+  /* Non-zero if nothing should be printed. */
+  int skipping_printing;
+
   /* Non-zero if printing should be verbose (e.g. include hashes). */
   int verbose;
 
   /* Rust mangling version, with legacy mangling being -1. */
   int version;
+
+  uint64_t bound_lifetime_depth;
 };
 
 /* Parsing functions. */
@@ -81,6 +87,18 @@ peek (const struct rust_demangler *rdm)
   return 0;
 }
 
+static int
+eat (struct rust_demangler *rdm, char c)
+{
+  if (peek (rdm) == c)
+    {
+      rdm->next++;
+      return 1;
+    }
+  else
+    return 0;
+}
+
 static char
 next (struct rust_demangler *rdm)
 {
@@ -92,11 +110,58 @@ next (struct rust_demangler *rdm)
   return c;
 }
 
+static uint64_t
+parse_integer_62 (struct rust_demangler *rdm)
+{
+  char c;
+  uint64_t x;
+
+  if (eat (rdm, '_'))
+    return 0;
+
+  x = 0;
+  while (!eat (rdm, '_'))
+    {
+      c = next (rdm);
+      x *= 62;
+      if (ISDIGIT (c))
+        x += c - '0';
+      else if (ISLOWER (c))
+        x += 10 + (c - 'a');
+      else if (ISUPPER (c))
+        x += 10 + 26 + (c - 'A');
+      else
+        {
+          rdm->errored = 1;
+          return 0;
+        }
+    }
+  return x + 1;
+}
+
+static uint64_t
+parse_opt_integer_62 (struct rust_demangler *rdm, char tag)
+{
+  if (!eat (rdm, tag))
+    return 0;
+  return 1 + parse_integer_62 (rdm);
+}
+
+static uint64_t
+parse_disambiguator (struct rust_demangler *rdm)
+{
+  return parse_opt_integer_62 (rdm, 's');
+}
+
 struct rust_mangled_ident
 {
   /* ASCII part of the identifier. */
   const char *ascii;
   size_t ascii_len;
+
+  /* Punycode insertion codes for Unicode codepoints, if any. */
+  const char *punycode;
+  size_t punycode_len;
 };
 
 static struct rust_mangled_ident
@@ -104,10 +169,16 @@ parse_ident (struct rust_demangler *rdm)
 {
   char c;
   size_t start, len;
+  int is_punycode = 0;
   struct rust_mangled_ident ident;
 
   ident.ascii = NULL;
   ident.ascii_len = 0;
+  ident.punycode = NULL;
+  ident.punycode_len = 0;
+
+  if (rdm->version != -1)
+    is_punycode = eat (rdm, 'u');
 
   c = next (rdm);
   if (!ISDIGIT (c))
@@ -121,6 +192,10 @@ parse_ident (struct rust_demangler *rdm)
     while (ISDIGIT (peek (rdm)))
       len = len * 10 + (next (rdm) - '0');
 
+  /* Skip past the optional `_` separator (v0). */
+  if (rdm->version != -1)
+    eat (rdm, '_');
+
   start = rdm->next;
   rdm->next += len;
   /* Check for overflows. */
@@ -133,6 +208,27 @@ parse_ident (struct rust_demangler *rdm)
   ident.ascii = rdm->sym + start;
   ident.ascii_len = len;
 
+  if (is_punycode)
+    {
+      ident.punycode_len = 0;
+      while (ident.ascii_len > 0)
+        {
+          ident.ascii_len--;
+
+          /* The last '_' is a separator between ascii & punycode. */
+          if (ident.ascii[ident.ascii_len] == '_')
+            break;
+
+          ident.punycode_len++;
+        }
+      if (!ident.punycode_len)
+        {
+          rdm->errored = 1;
+          return ident;
+        }
+      ident.punycode = ident.ascii + (len - ident.punycode_len);
+    }
+
   if (ident.ascii_len == 0)
     ident.ascii = NULL;
 
@@ -144,12 +240,28 @@ parse_ident (struct rust_demangler *rdm)
 static void
 print_str (struct rust_demangler *rdm, const char *data, size_t len)
 {
-  if (!rdm->errored)
+  if (!rdm->errored && !rdm->skipping_printing)
     rdm->callback (data, len, rdm->callback_opaque);
 }
 
 #define PRINT(s) print_str (rdm, s, strlen (s))
 
+static void
+print_uint64 (struct rust_demangler *rdm, uint64_t x)
+{
+  char s[21];
+  sprintf (s, "%" PRIu64, x);
+  PRINT (s);
+}
+
+static void
+print_uint64_hex (struct rust_demangler *rdm, uint64_t x)
+{
+  char s[17];
+  sprintf (s, "%" PRIx64, x);
+  PRINT (s);
+}
+
 /* Return a 0x0-0xf value if the char is 0-9a-f, and -1 otherwise. */
 static int
 decode_lower_hex_nibble (char nibble)
@@ -230,9 +342,14 @@ static void
 print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
 {
   char unescaped;
-  size_t len;
-
-  if (rdm->errored)
+  uint8_t *out, *p, d;
+  size_t len, cap, punycode_pos, j;
+  /* Punycode parameters and state. */
+  uint32_t c;
+  size_t base, t_min, t_max, skew, damp, bias, i;
+  size_t delta, w, k, t;
+
+  if (rdm->errored || rdm->skipping_printing)
     return;
 
   if (rdm->version == -1)
@@ -294,6 +411,758 @@ print_ident (struct rust_demangler *rdm, struct rust_mangled_ident ident)
 
       return;
     }
+
+  if (!ident.punycode)
+    {
+      print_str (rdm, ident.ascii, ident.ascii_len);
+      return;
+    }
+
+  len = 0;
+  cap = 4;
+  while (cap < ident.ascii_len)
+    {
+      cap *= 2;
+      /* Check for overflows. */
+      if ((cap * 4) / 4 != cap)
+        {
+          rdm->errored = 1;
+          return;
+        }
+    }
+
+  /* Store the output codepoints as groups of 4 UTF-8 bytes. */
+  out = (uint8_t *)malloc (cap * 4);
+  if (!out)
+    {
+      rdm->errored = 1;
+      return;
+    }
+
+  /* Populate initial output from ASCII fragment. */
+  for (len = 0; len < ident.ascii_len; len++)
+    {
+      p = out + 4 * len;
+      p[0] = 0;
+      p[1] = 0;
+      p[2] = 0;
+      p[3] = ident.ascii[len];
+    }
+
+  /* Punycode parameters and initial state. */
+  base = 36;
+  t_min = 1;
+  t_max = 26;
+  skew = 38;
+  damp = 700;
+  bias = 72;
+  i = 0;
+  c = 0x80;
+
+  punycode_pos = 0;
+  while (punycode_pos < ident.punycode_len)
+    {
+      /* Read one delta value. */
+      delta = 0;
+      w = 1;
+      k = 0;
+      do
+        {
+          k += base;
+          t = k < bias ? 0 : (k - bias);
+          if (t < t_min)
+            t = t_min;
+          if (t > t_max)
+            t = t_max;
+
+          if (punycode_pos >= ident.punycode_len)
+            goto cleanup;
+          d = ident.punycode[punycode_pos++];
+
+          if (ISLOWER (d))
+            d = d - 'a';
+          else if (ISDIGIT (d))
+            d = 26 + (d - '0');
+          else
+            {
+              rdm->errored = 1;
+              goto cleanup;
+            }
+
+          delta += d * w;
+          w *= base - t;
+        }
+      while (d >= t);
+
+      /* Compute the new insert position and character. */
+      len++;
+      i += delta;
+      c += i / len;
+      i %= len;
+
+      /* Ensure enough space is available. */
+      if (cap < len)
+        {
+          cap *= 2;
+          /* Check for overflows. */
+          if ((cap * 4) / 4 != cap || cap < len)
+            {
+              rdm->errored = 1;
+              goto cleanup;
+            }
+        }
+      p = (uint8_t *)realloc (out, cap * 4);
+      if (!p)
+        {
+          rdm->errored = 1;
+          goto cleanup;
+        }
+      out = p;
+
+      /* Move the characters after the insert position. */
+      p = out + i * 4;
+      memmove (p + 4, p, (len - i - 1) * 4);
+
+      /* Insert the new character, as UTF-8 bytes. */
+      p[0] = c >= 0x10000 ? 0xf0 | (c >> 18) : 0;
+      p[1] = c >= 0x800 ? (c < 0x10000 ? 0xe0 : 0x80) | ((c >> 12) & 0x3f) : 0;
+      p[2] = (c < 0x800 ? 0xc0 : 0x80) | ((c >> 6) & 0x3f);
+      p[3] = 0x80 | (c & 0x3f);
+
+      /* If there are no more deltas, decoding is complete. */
+      if (punycode_pos == ident.punycode_len)
+        break;
+
+      i++;
+
+      /* Perform bias adaptation. */
+      delta /= damp;
+      damp = 2;
+
+      delta += delta / len;
+      k = 0;
+      while (delta > ((base - t_min) * t_max) / 2)
+        {
+          delta /= base - t_min;
+          k += base;
+        }
+      bias = k + ((base - t_min + 1) * delta) / (delta + skew);
+    }
+
+  /* Remove all the 0 bytes to leave behind an UTF-8 string. */
+  for (i = 0, j = 0; i < len * 4; i++)
+    if (out[i] != 0)
+      out[j++] = out[i];
+
+  print_str (rdm, (const char *)out, j);
+
+cleanup:
+  free (out);
+}
+
+/* Print the lifetime according to the previously decoded index.
+   An index of `0` always refers to `'_`, but starting with `1`,
+   indices refer to late-bound lifetimes introduced by a binder. */
+static void
+print_lifetime_from_index (struct rust_demangler *rdm, uint64_t lt)
+{
+  char c;
+  uint64_t depth;
+
+  PRINT ("'");
+  if (lt == 0)
+    {
+      PRINT ("_");
+      return;
+    }
+
+  depth = rdm->bound_lifetime_depth - lt;
+  /* Try to print lifetimes alphabetically first. */
+  if (depth < 26)
+    {
+      c = 'a' + depth;
+      print_str (rdm, &c, 1);
+    }
+  else
+    {
+      /* Use `'_123` after running out of letters. */
+      PRINT ("_");
+      print_uint64 (rdm, depth);
+    }
+}
+
+/* Demangling functions. */
+
+static void demangle_binder (struct rust_demangler *rdm);
+static void demangle_path (struct rust_demangler *rdm, int in_value);
+static void demangle_generic_arg (struct rust_demangler *rdm);
+static void demangle_type (struct rust_demangler *rdm);
+static int demangle_path_maybe_open_generics (struct rust_demangler *rdm);
+static void demangle_dyn_trait (struct rust_demangler *rdm);
+static void demangle_const (struct rust_demangler *rdm);
+static void demangle_const_uint (struct rust_demangler *rdm);
+
+/* Optionally enter a binder ('G') for late-bound lifetimes,
+   printing e.g. `for<'a, 'b> `, and make those lifetimes visible
+   to the caller (via depth level, which the caller should reset). */
+static void
+demangle_binder (struct rust_demangler *rdm)
+{
+  uint64_t i, bound_lifetimes;
+
+  if (rdm->errored)
+    return;
+
+  bound_lifetimes = parse_opt_integer_62 (rdm, 'G');
+  if (bound_lifetimes > 0)
+    {
+      PRINT ("for<");
+      for (i = 0; i < bound_lifetimes; i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          rdm->bound_lifetime_depth++;
+          print_lifetime_from_index (rdm, 1);
+        }
+      PRINT ("> ");
+    }
+}
+
+static void
+demangle_path (struct rust_demangler *rdm, int in_value)
+{
+  char tag, ns;
+  int was_skipping_printing;
+  size_t i, backref, old_next;
+  uint64_t dis;
+  struct rust_mangled_ident name;
+
+  if (rdm->errored)
+    return;
+
+  switch (tag = next (rdm))
+    {
+    case 'C':
+      dis = parse_disambiguator (rdm);
+      name = parse_ident (rdm);
+
+      print_ident (rdm, name);
+      if (rdm->verbose)
+        {
+          PRINT ("[");
+          print_uint64_hex (rdm, dis);
+          PRINT ("]");
+        }
+      break;
+    case 'N':
+      ns = next (rdm);
+      if (!ISLOWER (ns) && !ISUPPER (ns))
+        {
+          rdm->errored = 1;
+          return;
+        }
+
+      demangle_path (rdm, in_value);
+
+      dis = parse_disambiguator (rdm);
+      name = parse_ident (rdm);
+
+      if (ISUPPER (ns))
+        {
+          /* Special namespaces, like closures and shims. */
+          PRINT ("::{");
+          switch (ns)
+            {
+            case 'C':
+              PRINT ("closure");
+              break;
+            case 'S':
+              PRINT ("shim");
+              break;
+            default:
+              print_str (rdm, &ns, 1);
+            }
+          if (name.ascii || name.punycode)
+            {
+              PRINT (":");
+              print_ident (rdm, name);
+            }
+          PRINT ("#");
+          print_uint64 (rdm, dis);
+          PRINT ("}");
+        }
+      else
+        {
+          /* Implementation-specific/unspecified namespaces. */
+
+          if (name.ascii || name.punycode)
+            {
+              PRINT ("::");
+              print_ident (rdm, name);
+            }
+        }
+      break;
+    case 'M':
+    case 'X':
+      /* Ignore the `impl`'s own path.*/
+      parse_disambiguator (rdm);
+      was_skipping_printing = rdm->skipping_printing;
+      rdm->skipping_printing = 1;
+      demangle_path (rdm, in_value);
+      rdm->skipping_printing = was_skipping_printing;
+      /* fallthrough */
+    case 'Y':
+      PRINT ("<");
+      demangle_type (rdm);
+      if (tag != 'M')
+        {
+          PRINT (" as ");
+          demangle_path (rdm, 0);
+        }
+      PRINT (">");
+      break;
+    case 'I':
+      demangle_path (rdm, in_value);
+      if (in_value)
+        PRINT ("::");
+      PRINT ("<");
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_generic_arg (rdm);
+        }
+      PRINT (">");
+      break;
+    case 'B':
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          demangle_path (rdm, in_value);
+          rdm->next = old_next;
+        }
+      break;
+    default:
+      rdm->errored = 1;
+      return;
+    }
+}
+
+static void
+demangle_generic_arg (struct rust_demangler *rdm)
+{
+  uint64_t lt;
+  if (eat (rdm, 'L'))
+    {
+      lt = parse_integer_62 (rdm);
+      print_lifetime_from_index (rdm, lt);
+    }
+  else if (eat (rdm, 'K'))
+    demangle_const (rdm);
+  else
+    demangle_type (rdm);
+}
+
+static const char *
+basic_type (char tag)
+{
+  switch (tag)
+    {
+    case 'b':
+      return "bool";
+    case 'c':
+      return "char";
+    case 'e':
+      return "str";
+    case 'u':
+      return "()";
+    case 'a':
+      return "i8";
+    case 's':
+      return "i16";
+    case 'l':
+      return "i32";
+    case 'x':
+      return "i64";
+    case 'n':
+      return "i128";
+    case 'i':
+      return "isize";
+    case 'h':
+      return "u8";
+    case 't':
+      return "u16";
+    case 'm':
+      return "u32";
+    case 'y':
+      return "u64";
+    case 'o':
+      return "u128";
+    case 'j':
+      return "usize";
+    case 'f':
+      return "f32";
+    case 'd':
+      return "f64";
+    case 'z':
+      return "!";
+    case 'p':
+      return "_";
+    case 'v':
+      return "...";
+
+    default:
+      return NULL;
+    }
+}
+
+static void
+demangle_type (struct rust_demangler *rdm)
+{
+  char tag;
+  size_t i, old_next, backref;
+  uint64_t lt, old_bound_lifetime_depth;
+  const char *basic;
+  struct rust_mangled_ident abi;
+
+  if (rdm->errored)
+    return;
+
+  tag = next (rdm);
+
+  basic = basic_type (tag);
+  if (basic)
+    {
+      PRINT (basic);
+      return;
+    }
+
+  switch (tag)
+    {
+    case 'R':
+    case 'Q':
+      PRINT ("&");
+      if (eat (rdm, 'L'))
+        {
+          lt = parse_integer_62 (rdm);
+          if (lt)
+            {
+              print_lifetime_from_index (rdm, lt);
+              PRINT (" ");
+            }
+        }
+      if (tag != 'R')
+        PRINT ("mut ");
+      demangle_type (rdm);
+      break;
+    case 'P':
+    case 'O':
+      PRINT ("*");
+      if (tag != 'P')
+        PRINT ("mut ");
+      else
+        PRINT ("const ");
+      demangle_type (rdm);
+      break;
+    case 'A':
+    case 'S':
+      PRINT ("[");
+      demangle_type (rdm);
+      if (tag == 'A')
+        {
+          PRINT ("; ");
+          demangle_const (rdm);
+        }
+      PRINT ("]");
+      break;
+    case 'T':
+      PRINT ("(");
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_type (rdm);
+        }
+      if (i == 1)
+        PRINT (",");
+      PRINT (")");
+      break;
+    case 'F':
+      old_bound_lifetime_depth = rdm->bound_lifetime_depth;
+      demangle_binder (rdm);
+
+      if (eat (rdm, 'U'))
+        PRINT ("unsafe ");
+
+      if (eat (rdm, 'K'))
+        {
+          if (eat (rdm, 'C'))
+            {
+              abi.ascii = "C";
+              abi.ascii_len = 1;
+            }
+          else
+            {
+              abi = parse_ident (rdm);
+              if (!abi.ascii || abi.punycode)
+                {
+                  rdm->errored = 1;
+                  goto restore;
+                }
+            }
+
+          PRINT ("extern \"");
+
+          /* If the ABI had any `-`, they were replaced with `_`,
+             so the parts between `_` have to be re-joined with `-`. */
+          for (i = 0; i < abi.ascii_len; i++)
+            {
+              if (abi.ascii[i] == '_')
+                {
+                  print_str (rdm, abi.ascii, i);
+                  PRINT ("-");
+                  abi.ascii += i + 1;
+                  abi.ascii_len -= i + 1;
+                  i = 0;
+                }
+            }
+          print_str (rdm, abi.ascii, abi.ascii_len);
+
+          PRINT ("\" ");
+        }
+
+      PRINT ("fn(");
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_type (rdm);
+        }
+      PRINT (")");
+
+      if (eat (rdm, 'u'))
+        {
+          /* Skip printing the return type if it's 'u', i.e. `()`. */
+        }
+      else
+        {
+          PRINT (" -> ");
+          demangle_type (rdm);
+        }
+
+    /* Restore `bound_lifetime_depth` to outside the binder. */
+    restore:
+      rdm->bound_lifetime_depth = old_bound_lifetime_depth;
+      break;
+    case 'D':
+      PRINT ("dyn ");
+
+      old_bound_lifetime_depth = rdm->bound_lifetime_depth;
+      demangle_binder (rdm);
+
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (" + ");
+          demangle_dyn_trait (rdm);
+        }
+
+      /* Restore `bound_lifetime_depth` to outside the binder. */
+      rdm->bound_lifetime_depth = old_bound_lifetime_depth;
+
+      if (!eat (rdm, 'L'))
+        {
+          rdm->errored = 1;
+          return;
+        }
+      lt = parse_integer_62 (rdm);
+      if (lt)
+        {
+          PRINT (" + ");
+          print_lifetime_from_index (rdm, lt);
+        }
+      break;
+    case 'B':
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          demangle_type (rdm);
+          rdm->next = old_next;
+        }
+      break;
+    default:
+      /* Go back to the tag, so `demangle_path` also sees it. */
+      rdm->next--;
+      demangle_path (rdm, 0);
+    }
+}
+
+/* A trait in a trait object may have some "existential projections"
+   (i.e. associated type bindings) after it, which should be printed
+   in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`.
+   To this end, this method will keep the `<...>` of an 'I' path
+   open, by omitting the `>`, and return `Ok(true)` in that case. */
+static int
+demangle_path_maybe_open_generics (struct rust_demangler *rdm)
+{
+  int open;
+  size_t i, old_next, backref;
+
+  open = 0;
+
+  if (rdm->errored)
+    return open;
+
+  if (eat (rdm, 'B'))
+    {
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          open = demangle_path_maybe_open_generics (rdm);
+          rdm->next = old_next;
+        }
+    }
+  else if (eat (rdm, 'I'))
+    {
+      demangle_path (rdm, 0);
+      PRINT ("<");
+      open = 1;
+      for (i = 0; !rdm->errored && !eat (rdm, 'E'); i++)
+        {
+          if (i > 0)
+            PRINT (", ");
+          demangle_generic_arg (rdm);
+        }
+    }
+  else
+    demangle_path (rdm, 0);
+  return open;
+}
+
+static void
+demangle_dyn_trait (struct rust_demangler *rdm)
+{
+  int open;
+  struct rust_mangled_ident name;
+
+  if (rdm->errored)
+    return;
+
+  open = demangle_path_maybe_open_generics (rdm);
+
+  while (eat (rdm, 'p'))
+    {
+      if (!open)
+        PRINT ("<");
+      else
+        PRINT (", ");
+      open = 1;
+
+      name = parse_ident (rdm);
+      print_ident (rdm, name);
+      PRINT (" = ");
+      demangle_type (rdm);
+    }
+
+  if (open)
+    PRINT (">");
+}
+
+static void
+demangle_const (struct rust_demangler *rdm)
+{
+  char ty_tag;
+  size_t old_next, backref;
+
+  if (rdm->errored)
+    return;
+
+  if (eat (rdm, 'B'))
+    {
+      backref = parse_integer_62 (rdm);
+      if (!rdm->skipping_printing)
+        {
+          old_next = rdm->next;
+          rdm->next = backref;
+          demangle_const (rdm);
+          rdm->next = old_next;
+        }
+      return;
+    }
+
+  ty_tag = next (rdm);
+  switch (ty_tag)
+    {
+    /* Unsigned integer types. */
+    case 'h':
+    case 't':
+    case 'm':
+    case 'y':
+    case 'o':
+    case 'j':
+      break;
+
+    default:
+      rdm->errored = 1;
+      return;
+    }
+
+  if (eat (rdm, 'p'))
+    PRINT ("_");
+  else
+    demangle_const_uint (rdm);
+
+  if (rdm->verbose)
+    {
+      PRINT (": ");
+      PRINT (basic_type (ty_tag));
+    }
+}
+
+static void
+demangle_const_uint (struct rust_demangler *rdm)
+{
+  char c;
+  size_t hex_len;
+  uint64_t value;
+
+  if (rdm->errored)
+    return;
+
+  value = 0;
+  hex_len = 0;
+  while (!eat (rdm, '_'))
+    {
+      value <<= 4;
+
+      c = next (rdm);
+      if (ISDIGIT (c))
+        value |= c - '0';
+      else if (c >= 'a' && c <= 'f')
+        value |= 10 + (c - 'a');
+      else
+        {
+          rdm->errored = 1;
+          return;
+        }
+      hex_len++;
+    }
+
+  /* Print anything that doesn't fit in `uint64_t` verbatim. */
+  if (hex_len > 16)
+    {
+      PRINT ("0x");
+      print_str (rdm, rdm->sym + (rdm->next - hex_len), hex_len);
+      return;
+    }
+
+  print_uint64 (rdm, value);
 }
 
 /* A legacy hash is the prefix "h" followed by 16 lowercase hex digits.
@@ -345,11 +1214,15 @@ rust_demangle_callback (const char *mangled, int options,
 
   rdm.next = 0;
   rdm.errored = 0;
+  rdm.skipping_printing = 0;
   rdm.verbose = (options & DMGL_VERBOSE) != 0;
   rdm.version = 0;
+  rdm.bound_lifetime_depth = 0;
 
-  /* Rust symbols always start with _ZN (legacy). */
-  if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
+  /* Rust symbols always start with _R (v0) or _ZN (legacy). */
+  if (rdm.sym[0] == '_' && rdm.sym[1] == 'R')
+    rdm.sym += 2;
+  else if (rdm.sym[0] == '_' && rdm.sym[1] == 'Z' && rdm.sym[2] == 'N')
     {
       rdm.sym += 3;
       rdm.version = -1;
@@ -357,7 +1230,11 @@ rust_demangle_callback (const char *mangled, int options,
   else
     return 0;
 
-  /* Legacy Rust symbols use only [_0-9a-zA-Z.:$] characters. */
+  /* Paths (v0) always start with uppercase characters. */
+  if (rdm.version != -1 && !ISUPPER (rdm.sym[0]))
+    return 0;
+
+  /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
   for (p = rdm.sym; *p; p++)
     {
       rdm.sym_len++;
@@ -365,6 +1242,7 @@ rust_demangle_callback (const char *mangled, int options,
       if (*p == '_' || ISALNUM (*p))
         continue;
 
+      /* Legacy Rust symbols can also contain [.:$] characters. */
       if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
         continue;
 
@@ -418,7 +1296,19 @@ rust_demangle_callback (const char *mangled, int options,
       while (rdm.next < rdm.sym_len);
     }
   else
-    return 0;
+    {
+      demangle_path (&rdm, 1);
+
+      /* Skip instantiating crate. */
+      if (!rdm.errored && rdm.next < rdm.sym_len)
+        {
+          rdm.skipping_printing = 1;
+          demangle_path (&rdm, 0);
+        }
+
+      /* It's an error to not reach the end. */
+      rdm.errored |= rdm.next != rdm.sym_len;
+    }
 
   return !rdm.errored;
 }
diff --git a/libiberty/testsuite/rust-demangle-expected b/libiberty/testsuite/rust-demangle-expected
index 74774794736a..6a6f4b141c09 100644
--- a/libiberty/testsuite/rust-demangle-expected
+++ b/libiberty/testsuite/rust-demangle-expected
@@ -11,7 +11,7 @@
 #
 ############
 #
-# Coverage Tests
+# Coverage Tests (legacy)
 #
 #
 # Demangles as rust symbol.
@@ -163,3 +163,60 @@ _ZN63_$LT$core..ptr..Unique$LT$T$GT$$u20$as$u20$core..ops..Deref$GT$5deref17h19f
 --format=rust
 _ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h059a991a004536adE
 issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo
+#
+############
+#
+# Coverage Tests (v0)
+#
+#
+# Crate with a leading digit.
+--format=rust
+_RNvC6_123foo3bar
+123foo::bar
+# UTF-8 identifiers.
+--format=rust
+_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y
+utf8_idents::საჭმელად_გემრიელი_სადილი
+# Closure path elements.
+--format=rust
+_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_
+cc::spawn::{closure#0}::{closure#0}
+#
+--format=rust
+_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_
+<core::slice::Iter<u8> as core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}
+# dyn Trait ("trait object") types.
+--format=rust
+_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std
+alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>
+# Type with a const generic parameter.
+--format=rust
+_RNvMC0INtC8arrayvec8ArrayVechKj7b_E3new
+<arrayvec::ArrayVec<u8, 123>>::new
+#
+# All of the tests above but in auto mode instead:
+#
+# Crate with a leading digit.
+--format=auto
+_RNvC6_123foo3bar
+123foo::bar
+# UTF-8 identifiers.
+--format=auto
+_RNqCs4fqI2P2rA04_11utf8_identsu30____7hkackfecea1cbdathfdh9hlq6y
+utf8_idents::საჭმელად_გემრიელი_სადილი
+# Closure path elements.
+--format=auto
+_RNCNCNgCs6DXkGYLi8lr_2cc5spawn00B5_
+cc::spawn::{closure#0}::{closure#0}
+#
+--format=auto
+_RNCINkXs25_NgCsbmNqQUJIY6D_4core5sliceINyB9_4IterhENuNgNoBb_4iter8iterator8Iterator9rpositionNCNgNpB9_6memchr7memrchrs_0E0Bb_
+<core::slice::Iter<u8> as core::iter::iterator::Iterator>::rposition::<core::slice::memchr::memrchr::{closure#1}>::{closure#0}
+# dyn Trait ("trait object") types.
+--format=auto
+_RINbNbCskIICzLVDPPb_5alloc5alloc8box_freeDINbNiB4_5boxed5FnBoxuEp6OutputuEL_ECs1iopQbuBiw2_3std
+alloc::alloc::box_free::<dyn alloc::boxed::FnBox<(), Output = ()>>
+# Type with a const generic parameter.
+--format=auto
+_RNvMC0INtC8arrayvec8ArrayVechKj7b_E3new
+<arrayvec::ArrayVec<u8, 123>>::new
-- 
2.24.1


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2021-04-21 11:24 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-13 20:28 [PATCH] Support the new ("v0") mangling scheme in rust-demangle Eduard-Mihai Burtescu
2020-03-23  1:34 ` Eduard-Mihai Burtescu
2020-10-28 21:22   ` Nikhil Benesch
2020-10-28 21:25     ` Nikhil Benesch
2020-10-28 21:47       ` Eduard-Mihai Burtescu
2020-10-28 22:21         ` Nikhil Benesch
2020-10-28 22:45           ` Ian Lance Taylor
2020-10-28 23:52             ` Eduard-Mihai Burtescu
2020-10-29  4:16               ` Nikhil Benesch
2020-11-01  9:18                 ` Nikhil Benesch
2020-11-01 11:57                   ` Eduard-Mihai Burtescu
2020-11-01 17:26                     ` Nikhil Benesch
2020-11-06 17:09                       ` Jeff Law
2020-11-13  6:42                         ` Nikhil Benesch
2020-11-13 13:34                           ` Eduard-Mihai Burtescu
2021-04-20 15:55                           ` Andreas Schwab
2021-04-21 11:24                             ` Jakub Jelinek
2020-04-06 21:52 ` Eduard-Mihai Burtescu
2020-04-13  2:52   ` Eduard-Mihai Burtescu
2020-04-23 18:52     ` Eduard-Mihai Burtescu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).