diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc index 62530d65382f..248d69b72a72 100644 --- a/gcc/rust/backend/rust-mangle.cc +++ b/gcc/rust/backend/rust-mangle.cc @@ -1,8 +1,11 @@ #include "rust-mangle.h" #include "fnv-hash.h" +#include "optional.h" #include "rust-base62.h" #include "rust-unicode.h" -#include "optional.h" +#include "rust-diagnostics.h" +#include "rust-unicode.h" +#include "rust-punycode.h" // FIXME: Rename those to legacy_* static const std::string kMangledSymbolPrefix = "_ZN"; @@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis) static void v0_add_identifier (std::string &mangled, const std::string &identifier) { - // FIXME: gccrs cannot handle unicode identifiers yet, so we never have to - // create mangling for unicode values for now. However, this is handled - // by the v0 mangling scheme. The grammar for unicode identifier is - // contained in , right under the - // one. If the identifier contains unicode values, then an extra "u" needs - // to be added to the mangling string and `punycode` must be used to encode - // the characters. - - mangled += std::to_string (identifier.size ()); - + // The grammar for unicode identifier is contained in + // , right under the one. If the + // identifier contains unicode values, then an extra "u" needs to be added to + // the mangling string and `punycode` must be used to encode the characters. + tl::optional uident_opt + = Utf8String::make_utf8_string (identifier); + rust_assert (uident_opt.has_value ()); + tl::optional punycode_opt + = encode_punycode (uident_opt.value ()); + rust_assert (punycode_opt.has_value ()); + + bool is_ascii_ident = true; + for (auto c : uident_opt.value ().get_chars ()) + if (c.value > 127) + { + is_ascii_ident = false; + break; + } + + std::string punycode = punycode_opt.value (); + // remove tailing hyphen + if (punycode.back () == '-') + punycode.pop_back (); + // replace hyphens in punycode with underscores + std::replace (punycode.begin (), punycode.end (), '-', '_'); + + if (!is_ascii_ident) + mangled.append ("u"); + + mangled += std::to_string (punycode.size ()); // If the first character of the identifier is a digit or an underscore, we // add an extra underscore - if (identifier[0] == '_') - mangled.append ("_"); + if (punycode[0] == '_') + mangled += "_"; - mangled.append (identifier); + mangled += punycode; } static std::string @@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path) std::string mangled; // FIXME: Add real algorithm once all pieces are implemented - auto ty_prefix = v0_type_prefix (ty); v0_add_identifier (mangled, crate_name); v0_add_disambiguator (mangled, 62); + auto ty_prefix = v0_type_prefix (ty); rust_unreachable (); } diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 0798be38744c..1afcd016bd36 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -2543,8 +2543,9 @@ Lexer::start_line (int current_line, int current_column) namespace selftest { // Checks if `src` has the same contents as the given characters -void -assert_source_content (Rust::InputSource &src, std::vector expected) +static void +assert_source_content (Rust::InputSource &src, + const std::vector &expected) { Rust::Codepoint src_char = src.next (); for (auto expected_char : expected) @@ -2559,15 +2560,16 @@ assert_source_content (Rust::InputSource &src, std::vector expected) ASSERT_TRUE (src_char.is_eof ()); } -void -test_buffer_input_source (std::string str, std::vector expected) +static void +test_buffer_input_source (std::string str, + const std::vector &expected) { Rust::BufferInputSource source (str, 0); assert_source_content (source, expected); } -void -test_file_input_source (std::string str, std::vector expected) +static void +test_file_input_source (std::string str, const std::vector &expected) { FILE *tmpf = tmpfile (); // Moves to the first character diff --git a/gcc/rust/resolve/rust-ast-resolve-toplevel.h b/gcc/rust/resolve/rust-ast-resolve-toplevel.h index 12b7103287ce..9ba8bdb5caff 100644 --- a/gcc/rust/resolve/rust-ast-resolve-toplevel.h +++ b/gcc/rust/resolve/rust-ast-resolve-toplevel.h @@ -430,7 +430,7 @@ class ResolveTopLevel : public ResolverBase } else { - CrateNum found_crate_num = UNKNOWN_CREATENUM; + CrateNum found_crate_num = UNKNOWN_CRATENUM; bool found = mappings->lookup_crate_name (extern_crate.get_referenced_crate (), found_crate_num); diff --git a/gcc/rust/rust-session-manager.cc b/gcc/rust/rust-session-manager.cc index 18e7ea25a06d..abf127227588 100644 --- a/gcc/rust/rust-session-manager.cc +++ b/gcc/rust/rust-session-manager.cc @@ -979,7 +979,7 @@ NodeId Session::load_extern_crate (const std::string &crate_name, location_t locus) { // has it already been loaded? - CrateNum found_crate_num = UNKNOWN_CREATENUM; + CrateNum found_crate_num = UNKNOWN_CRATENUM; bool found = mappings->lookup_crate_name (crate_name, found_crate_num); if (found) { diff --git a/gcc/rust/util/rust-canonical-path.h b/gcc/rust/util/rust-canonical-path.h index b168c937facd..2f28302c1c14 100644 --- a/gcc/rust/util/rust-canonical-path.h +++ b/gcc/rust/util/rust-canonical-path.h @@ -58,7 +58,7 @@ class CanonicalPath { rust_assert (!path.empty ()); return CanonicalPath ({std::pair (id, path)}, - UNKNOWN_CREATENUM); + UNKNOWN_CRATENUM); } static CanonicalPath @@ -88,7 +88,7 @@ class CanonicalPath static CanonicalPath create_empty () { - return CanonicalPath ({}, UNKNOWN_CREATENUM); + return CanonicalPath ({}, UNKNOWN_CRATENUM); } bool is_empty () const { return segs.size () == 0; } @@ -171,7 +171,7 @@ class CanonicalPath CrateNum get_crate_num () const { - rust_assert (crate_num != UNKNOWN_CREATENUM); + rust_assert (crate_num != UNKNOWN_CRATENUM); return crate_num; } diff --git a/gcc/rust/util/rust-hir-map.cc b/gcc/rust/util/rust-hir-map.cc index cf907e935674..62e8c7cfb69e 100644 --- a/gcc/rust/util/rust-hir-map.cc +++ b/gcc/rust/util/rust-hir-map.cc @@ -29,7 +29,7 @@ namespace Analysis { NodeMapping NodeMapping::get_error () { - return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID, + return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID, UNKNOWN_LOCAL_DEFID); } @@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1; static const HirId kDefaultCrateNumBegin = 0; Mappings::Mappings () - : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM), + : crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM), hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin) { Analysis::NodeMapping node (0, 0, 0, 0); diff --git a/gcc/rust/util/rust-mapping-common.h b/gcc/rust/util/rust-mapping-common.h index d18dab5385c7..93df863bf673 100644 --- a/gcc/rust/util/rust-mapping-common.h +++ b/gcc/rust/util/rust-mapping-common.h @@ -61,7 +61,7 @@ struct DefId } }; -#define UNKNOWN_CREATENUM ((uint32_t) (0)) +#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX)) #define UNKNOWN_NODEID ((uint32_t) (0)) #define UNKNOWN_HIRID ((uint32_t) (0)) #define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))