public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-7933] gccrs: Add punycode encoding to v0 mangling
@ 2024-01-16 18:08 Arthur Cohen
0 siblings, 0 replies; only message in thread
From: Arthur Cohen @ 2024-01-16 18:08 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:10da25cd81461c9e18a2b4e6c44a83a74e6c0e2d
commit r14-7933-g10da25cd81461c9e18a2b4e6c44a83a74e6c0e2d
Author: Raiki Tamura <tamaron1203@gmail.com>
Date: Tue Aug 8 02:08:38 2023 +0900
gccrs: Add punycode encoding to v0 mangling
gcc/rust/ChangeLog:
* backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding
(v0_mangle_item): Likewise.
* lex/rust-lex.cc (assert_source_content): Change type
(test_buffer_input_source): Change type
(test_file_input_source): Change type
* resolve/rust-ast-resolve-toplevel.h: fix typo
* rust-session-manager.cc (Session::load_extern_crate): fix typo
* util/rust-canonical-path.h: fix typo
* util/rust-hir-map.cc (NodeMapping::get_error): fix typo
(Mappings::Mappings): fix typo
* util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo
(UNKNOWN_CRATENUM): Change 0 to UINT32_MAX
Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>
Diff:
---
gcc/rust/backend/rust-mangle.cc | 53 ++++++++++++++++++++--------
gcc/rust/lex/rust-lex.cc | 14 ++++----
gcc/rust/resolve/rust-ast-resolve-toplevel.h | 2 +-
gcc/rust/rust-session-manager.cc | 2 +-
gcc/rust/util/rust-canonical-path.h | 6 ++--
gcc/rust/util/rust-hir-map.cc | 4 +--
gcc/rust/util/rust-mapping-common.h | 2 +-
7 files changed, 54 insertions(+), 29 deletions(-)
diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc
index 62530d65382..248d69b72a7 100644
--- a/gcc/rust/backend/rust-mangle.cc
+++ b/gcc/rust/backend/rust-mangle.cc
@@ -1,8 +1,11 @@
#include "rust-mangle.h"
#include "fnv-hash.h"
+#include "optional.h"
#include "rust-base62.h"
#include "rust-unicode.h"
-#include "optional.h"
+#include "rust-diagnostics.h"
+#include "rust-unicode.h"
+#include "rust-punycode.h"
// FIXME: Rename those to legacy_*
static const std::string kMangledSymbolPrefix = "_ZN";
@@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
static void
v0_add_identifier (std::string &mangled, const std::string &identifier)
{
- // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
- // create mangling for unicode values for now. However, this is handled
- // by the v0 mangling scheme. The grammar for unicode identifier is
- // contained in <undisambiguated-identifier>, right under the <identifier>
- // one. If the identifier contains unicode values, then an extra "u" needs
- // to be added to the mangling string and `punycode` must be used to encode
- // the characters.
-
- mangled += std::to_string (identifier.size ());
-
+ // The grammar for unicode identifier is contained in
+ // <undisambiguated-identifier>, right under the <identifier> one. If the
+ // identifier contains unicode values, then an extra "u" needs to be added to
+ // the mangling string and `punycode` must be used to encode the characters.
+ tl::optional<Utf8String> uident_opt
+ = Utf8String::make_utf8_string (identifier);
+ rust_assert (uident_opt.has_value ());
+ tl::optional<std::string> punycode_opt
+ = encode_punycode (uident_opt.value ());
+ rust_assert (punycode_opt.has_value ());
+
+ bool is_ascii_ident = true;
+ for (auto c : uident_opt.value ().get_chars ())
+ if (c.value > 127)
+ {
+ is_ascii_ident = false;
+ break;
+ }
+
+ std::string punycode = punycode_opt.value ();
+ // remove tailing hyphen
+ if (punycode.back () == '-')
+ punycode.pop_back ();
+ // replace hyphens in punycode with underscores
+ std::replace (punycode.begin (), punycode.end (), '-', '_');
+
+ if (!is_ascii_ident)
+ mangled.append ("u");
+
+ mangled += std::to_string (punycode.size ());
// If the first character of the identifier is a digit or an underscore, we
// add an extra underscore
- if (identifier[0] == '_')
- mangled.append ("_");
+ if (punycode[0] == '_')
+ mangled += "_";
- mangled.append (identifier);
+ mangled += punycode;
}
static std::string
@@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
std::string mangled;
// FIXME: Add real algorithm once all pieces are implemented
- auto ty_prefix = v0_type_prefix (ty);
v0_add_identifier (mangled, crate_name);
v0_add_disambiguator (mangled, 62);
+ auto ty_prefix = v0_type_prefix (ty);
rust_unreachable ();
}
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 8142aa0cf78..cd966dd8e5c 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -2543,8 +2543,9 @@ Lexer::start_line (int current_line, int current_column)
namespace selftest {
// Checks if `src` has the same contents as the given characters
-void
-assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
+static void
+assert_source_content (Rust::InputSource &src,
+ const std::vector<uint32_t> &expected)
{
Rust::Codepoint src_char = src.next ();
for (auto expected_char : expected)
@@ -2559,15 +2560,16 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
ASSERT_TRUE (src_char.is_eof ());
}
-void
-test_buffer_input_source (std::string str, std::vector<uint32_t> expected)
+static void
+test_buffer_input_source (std::string str,
+ const std::vector<uint32_t> &expected)
{
Rust::BufferInputSource source (str, 0);
assert_source_content (source, expected);
}
-void
-test_file_input_source (std::string str, std::vector<uint32_t> expected)
+static void
+test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
{
FILE *tmpf = tmpfile ();
// Moves to the first character
diff --git a/gcc/rust/resolve/rust-ast-resolve-toplevel.h b/gcc/rust/resolve/rust-ast-resolve-toplevel.h
index 2ef3e7530c7..88d034b7869 100644
--- a/gcc/rust/resolve/rust-ast-resolve-toplevel.h
+++ b/gcc/rust/resolve/rust-ast-resolve-toplevel.h
@@ -430,7 +430,7 @@ public:
}
else
{
- CrateNum found_crate_num = UNKNOWN_CREATENUM;
+ CrateNum found_crate_num = UNKNOWN_CRATENUM;
bool found
= mappings->lookup_crate_name (extern_crate.get_referenced_crate (),
found_crate_num);
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index bb6cf4c9cca..1c5d72906d8 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -979,7 +979,7 @@ NodeId
Session::load_extern_crate (const std::string &crate_name, location_t locus)
{
// has it already been loaded?
- CrateNum found_crate_num = UNKNOWN_CREATENUM;
+ CrateNum found_crate_num = UNKNOWN_CRATENUM;
bool found = mappings->lookup_crate_name (crate_name, found_crate_num);
if (found)
{
diff --git a/gcc/rust/util/rust-canonical-path.h b/gcc/rust/util/rust-canonical-path.h
index d14b43e009a..a524feaea1a 100644
--- a/gcc/rust/util/rust-canonical-path.h
+++ b/gcc/rust/util/rust-canonical-path.h
@@ -58,7 +58,7 @@ public:
{
rust_assert (!path.empty ());
return CanonicalPath ({std::pair<NodeId, std::string> (id, path)},
- UNKNOWN_CREATENUM);
+ UNKNOWN_CRATENUM);
}
static CanonicalPath
@@ -88,7 +88,7 @@ public:
static CanonicalPath create_empty ()
{
- return CanonicalPath ({}, UNKNOWN_CREATENUM);
+ return CanonicalPath ({}, UNKNOWN_CRATENUM);
}
bool is_empty () const { return segs.size () == 0; }
@@ -171,7 +171,7 @@ public:
CrateNum get_crate_num () const
{
- rust_assert (crate_num != UNKNOWN_CREATENUM);
+ rust_assert (crate_num != UNKNOWN_CRATENUM);
return crate_num;
}
diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc
index 647c29f3b2e..1f126c15304 100644
--- a/gcc/rust/util/rust-hir-map.cc
+++ b/gcc/rust/util/rust-hir-map.cc
@@ -29,7 +29,7 @@ namespace Analysis {
NodeMapping
NodeMapping::get_error ()
{
- return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
+ return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
UNKNOWN_LOCAL_DEFID);
}
@@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1;
static const HirId kDefaultCrateNumBegin = 0;
Mappings::Mappings ()
- : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM),
+ : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM),
hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin)
{
Analysis::NodeMapping node (0, 0, 0, 0);
diff --git a/gcc/rust/util/rust-mapping-common.h b/gcc/rust/util/rust-mapping-common.h
index 7c0633eb18c..a51e4449c99 100644
--- a/gcc/rust/util/rust-mapping-common.h
+++ b/gcc/rust/util/rust-mapping-common.h
@@ -61,7 +61,7 @@ struct DefId
}
};
-#define UNKNOWN_CREATENUM ((uint32_t) (0))
+#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX))
#define UNKNOWN_NODEID ((uint32_t) (0))
#define UNKNOWN_HIRID ((uint32_t) (0))
#define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2024-01-16 18:08 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-16 18:08 [gcc r14-7933] gccrs: Add punycode encoding to v0 mangling Arthur Cohen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).