public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-7933] gccrs: Add punycode encoding to v0 mangling
@ 2024-01-16 18:08 Arthur Cohen
  0 siblings, 0 replies; only message in thread
From: Arthur Cohen @ 2024-01-16 18:08 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:10da25cd81461c9e18a2b4e6c44a83a74e6c0e2d

commit r14-7933-g10da25cd81461c9e18a2b4e6c44a83a74e6c0e2d
Author: Raiki Tamura <tamaron1203@gmail.com>
Date:   Tue Aug 8 02:08:38 2023 +0900

    gccrs: Add punycode encoding to v0 mangling
    
    gcc/rust/ChangeLog:
    
            * backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding
            (v0_mangle_item): Likewise.
            * lex/rust-lex.cc (assert_source_content): Change type
            (test_buffer_input_source): Change type
            (test_file_input_source): Change type
            * resolve/rust-ast-resolve-toplevel.h: fix typo
            * rust-session-manager.cc (Session::load_extern_crate): fix typo
            * util/rust-canonical-path.h: fix typo
            * util/rust-hir-map.cc (NodeMapping::get_error): fix typo
            (Mappings::Mappings): fix typo
            * util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo
            (UNKNOWN_CRATENUM): Change 0 to UINT32_MAX
    
    Signed-off-by: Raiki Tamura <tamaron1203@gmail.com>

Diff:
---
 gcc/rust/backend/rust-mangle.cc              | 53 ++++++++++++++++++++--------
 gcc/rust/lex/rust-lex.cc                     | 14 ++++----
 gcc/rust/resolve/rust-ast-resolve-toplevel.h |  2 +-
 gcc/rust/rust-session-manager.cc             |  2 +-
 gcc/rust/util/rust-canonical-path.h          |  6 ++--
 gcc/rust/util/rust-hir-map.cc                |  4 +--
 gcc/rust/util/rust-mapping-common.h          |  2 +-
 7 files changed, 54 insertions(+), 29 deletions(-)

diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc
index 62530d65382..248d69b72a7 100644
--- a/gcc/rust/backend/rust-mangle.cc
+++ b/gcc/rust/backend/rust-mangle.cc
@@ -1,8 +1,11 @@
 #include "rust-mangle.h"
 #include "fnv-hash.h"
+#include "optional.h"
 #include "rust-base62.h"
 #include "rust-unicode.h"
-#include "optional.h"
+#include "rust-diagnostics.h"
+#include "rust-unicode.h"
+#include "rust-punycode.h"
 
 // FIXME: Rename those to legacy_*
 static const std::string kMangledSymbolPrefix = "_ZN";
@@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
 static void
 v0_add_identifier (std::string &mangled, const std::string &identifier)
 {
-  // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
-  // create mangling for unicode values for now. However, this is handled
-  // by the v0 mangling scheme. The grammar for unicode identifier is
-  // contained in <undisambiguated-identifier>, right under the <identifier>
-  // one. If the identifier contains unicode values, then an extra "u" needs
-  // to be added to the mangling string and `punycode` must be used to encode
-  // the characters.
-
-  mangled += std::to_string (identifier.size ());
-
+  // The grammar for unicode identifier is contained in
+  // <undisambiguated-identifier>, right under the <identifier> one. If the
+  // identifier contains unicode values, then an extra "u" needs to be added to
+  // the mangling string and `punycode` must be used to encode the characters.
+  tl::optional<Utf8String> uident_opt
+    = Utf8String::make_utf8_string (identifier);
+  rust_assert (uident_opt.has_value ());
+  tl::optional<std::string> punycode_opt
+    = encode_punycode (uident_opt.value ());
+  rust_assert (punycode_opt.has_value ());
+
+  bool is_ascii_ident = true;
+  for (auto c : uident_opt.value ().get_chars ())
+    if (c.value > 127)
+      {
+	is_ascii_ident = false;
+	break;
+      }
+
+  std::string punycode = punycode_opt.value ();
+  // remove tailing hyphen
+  if (punycode.back () == '-')
+    punycode.pop_back ();
+  // replace hyphens in punycode with underscores
+  std::replace (punycode.begin (), punycode.end (), '-', '_');
+
+  if (!is_ascii_ident)
+    mangled.append ("u");
+
+  mangled += std::to_string (punycode.size ());
   // If the first character of the identifier is a digit or an underscore, we
   // add an extra underscore
-  if (identifier[0] == '_')
-    mangled.append ("_");
+  if (punycode[0] == '_')
+    mangled += "_";
 
-  mangled.append (identifier);
+  mangled += punycode;
 }
 
 static std::string
@@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)
 
   std::string mangled;
   // FIXME: Add real algorithm once all pieces are implemented
-  auto ty_prefix = v0_type_prefix (ty);
   v0_add_identifier (mangled, crate_name);
   v0_add_disambiguator (mangled, 62);
+  auto ty_prefix = v0_type_prefix (ty);
 
   rust_unreachable ();
 }
diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc
index 8142aa0cf78..cd966dd8e5c 100644
--- a/gcc/rust/lex/rust-lex.cc
+++ b/gcc/rust/lex/rust-lex.cc
@@ -2543,8 +2543,9 @@ Lexer::start_line (int current_line, int current_column)
 namespace selftest {
 
 // Checks if `src` has the same contents as the given characters
-void
-assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
+static void
+assert_source_content (Rust::InputSource &src,
+		       const std::vector<uint32_t> &expected)
 {
   Rust::Codepoint src_char = src.next ();
   for (auto expected_char : expected)
@@ -2559,15 +2560,16 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
   ASSERT_TRUE (src_char.is_eof ());
 }
 
-void
-test_buffer_input_source (std::string str, std::vector<uint32_t> expected)
+static void
+test_buffer_input_source (std::string str,
+			  const std::vector<uint32_t> &expected)
 {
   Rust::BufferInputSource source (str, 0);
   assert_source_content (source, expected);
 }
 
-void
-test_file_input_source (std::string str, std::vector<uint32_t> expected)
+static void
+test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
 {
   FILE *tmpf = tmpfile ();
   // Moves to the first character
diff --git a/gcc/rust/resolve/rust-ast-resolve-toplevel.h b/gcc/rust/resolve/rust-ast-resolve-toplevel.h
index 2ef3e7530c7..88d034b7869 100644
--- a/gcc/rust/resolve/rust-ast-resolve-toplevel.h
+++ b/gcc/rust/resolve/rust-ast-resolve-toplevel.h
@@ -430,7 +430,7 @@ public:
       }
     else
       {
-	CrateNum found_crate_num = UNKNOWN_CREATENUM;
+	CrateNum found_crate_num = UNKNOWN_CRATENUM;
 	bool found
 	  = mappings->lookup_crate_name (extern_crate.get_referenced_crate (),
 					 found_crate_num);
diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc
index bb6cf4c9cca..1c5d72906d8 100644
--- a/gcc/rust/rust-session-manager.cc
+++ b/gcc/rust/rust-session-manager.cc
@@ -979,7 +979,7 @@ NodeId
 Session::load_extern_crate (const std::string &crate_name, location_t locus)
 {
   // has it already been loaded?
-  CrateNum found_crate_num = UNKNOWN_CREATENUM;
+  CrateNum found_crate_num = UNKNOWN_CRATENUM;
   bool found = mappings->lookup_crate_name (crate_name, found_crate_num);
   if (found)
     {
diff --git a/gcc/rust/util/rust-canonical-path.h b/gcc/rust/util/rust-canonical-path.h
index d14b43e009a..a524feaea1a 100644
--- a/gcc/rust/util/rust-canonical-path.h
+++ b/gcc/rust/util/rust-canonical-path.h
@@ -58,7 +58,7 @@ public:
   {
     rust_assert (!path.empty ());
     return CanonicalPath ({std::pair<NodeId, std::string> (id, path)},
-			  UNKNOWN_CREATENUM);
+			  UNKNOWN_CRATENUM);
   }
 
   static CanonicalPath
@@ -88,7 +88,7 @@ public:
 
   static CanonicalPath create_empty ()
   {
-    return CanonicalPath ({}, UNKNOWN_CREATENUM);
+    return CanonicalPath ({}, UNKNOWN_CRATENUM);
   }
 
   bool is_empty () const { return segs.size () == 0; }
@@ -171,7 +171,7 @@ public:
 
   CrateNum get_crate_num () const
   {
-    rust_assert (crate_num != UNKNOWN_CREATENUM);
+    rust_assert (crate_num != UNKNOWN_CRATENUM);
     return crate_num;
   }
 
diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc
index 647c29f3b2e..1f126c15304 100644
--- a/gcc/rust/util/rust-hir-map.cc
+++ b/gcc/rust/util/rust-hir-map.cc
@@ -29,7 +29,7 @@ namespace Analysis {
 NodeMapping
 NodeMapping::get_error ()
 {
-  return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
+  return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
 		      UNKNOWN_LOCAL_DEFID);
 }
 
@@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1;
 static const HirId kDefaultCrateNumBegin = 0;
 
 Mappings::Mappings ()
-  : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM),
+  : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM),
     hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin)
 {
   Analysis::NodeMapping node (0, 0, 0, 0);
diff --git a/gcc/rust/util/rust-mapping-common.h b/gcc/rust/util/rust-mapping-common.h
index 7c0633eb18c..a51e4449c99 100644
--- a/gcc/rust/util/rust-mapping-common.h
+++ b/gcc/rust/util/rust-mapping-common.h
@@ -61,7 +61,7 @@ struct DefId
   }
 };
 
-#define UNKNOWN_CREATENUM ((uint32_t) (0))
+#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX))
 #define UNKNOWN_NODEID ((uint32_t) (0))
 #define UNKNOWN_HIRID ((uint32_t) (0))
 #define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-01-16 18:08 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-01-16 18:08 [gcc r14-7933] gccrs: Add punycode encoding to v0 mangling Arthur Cohen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).