Skip to content

Commit

Permalink
gccrs: Add punycode encoding to v0 mangling
Browse files Browse the repository at this point in the history
gcc/rust/ChangeLog:

	* backend/rust-mangle.cc (v0_add_identifier): Added punycode encoding
	(v0_mangle_item): Likewise.
	* lex/rust-lex.cc (assert_source_content): Change type
	(test_buffer_input_source): Change type
	(test_file_input_source): Change type
	* resolve/rust-ast-resolve-toplevel.h: fix typo
	* rust-session-manager.cc (Session::load_extern_crate): fix typo
	* util/rust-canonical-path.h: fix typo
	* util/rust-hir-map.cc (NodeMapping::get_error): fix typo
	(Mappings::Mappings): fix typo
	* util/rust-mapping-common.h (UNKNOWN_CREATENUM): fix typo
	(UNKNOWN_CRATENUM): Change 0 to UINT32_MAX

Signed-off-by: Raiki Tamura <[email protected]>
  • Loading branch information
tamaroning authored and CohenArthur committed Aug 18, 2023
1 parent 879a62f commit b1dd53f
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 29 deletions.
53 changes: 38 additions & 15 deletions gcc/rust/backend/rust-mangle.cc
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
#include "rust-mangle.h"
#include "fnv-hash.h"
#include "optional.h"
#include "rust-base62.h"
#include "rust-unicode.h"
#include "optional.h"
#include "rust-diagnostics.h"
#include "rust-unicode.h"
#include "rust-punycode.h"

// FIXME: Rename those to legacy_*
static const std::string kMangledSymbolPrefix = "_ZN";
Expand Down Expand Up @@ -249,22 +252,42 @@ v0_add_disambiguator (std::string &mangled, uint64_t dis)
static void
v0_add_identifier (std::string &mangled, const std::string &identifier)
{
// FIXME: gccrs cannot handle unicode identifiers yet, so we never have to
// create mangling for unicode values for now. However, this is handled
// by the v0 mangling scheme. The grammar for unicode identifier is
// contained in <undisambiguated-identifier>, right under the <identifier>
// one. If the identifier contains unicode values, then an extra "u" needs
// to be added to the mangling string and `punycode` must be used to encode
// the characters.

mangled += std::to_string (identifier.size ());

// The grammar for unicode identifier is contained in
// <undisambiguated-identifier>, right under the <identifier> one. If the
// identifier contains unicode values, then an extra "u" needs to be added to
// the mangling string and `punycode` must be used to encode the characters.
tl::optional<Utf8String> uident_opt
= Utf8String::make_utf8_string (identifier);
rust_assert (uident_opt.has_value ());
tl::optional<std::string> punycode_opt
= encode_punycode (uident_opt.value ());
rust_assert (punycode_opt.has_value ());

bool is_ascii_ident = true;
for (auto c : uident_opt.value ().get_chars ())
if (c.value > 127)
{
is_ascii_ident = false;
break;
}

std::string punycode = punycode_opt.value ();
// remove tailing hyphen
if (punycode.back () == '-')
punycode.pop_back ();
// replace hyphens in punycode with underscores
std::replace (punycode.begin (), punycode.end (), '-', '_');

if (!is_ascii_ident)
mangled.append ("u");

mangled += std::to_string (punycode.size ());
// If the first character of the identifier is a digit or an underscore, we
// add an extra underscore
if (identifier[0] == '_')
mangled.append ("_");
if (punycode[0] == '_')
mangled += "_";

mangled.append (identifier);
mangled += punycode;
}

static std::string
Expand Down Expand Up @@ -300,9 +323,9 @@ v0_mangle_item (const TyTy::BaseType *ty, const Resolver::CanonicalPath &path)

std::string mangled;
// FIXME: Add real algorithm once all pieces are implemented
auto ty_prefix = v0_type_prefix (ty);
v0_add_identifier (mangled, crate_name);
v0_add_disambiguator (mangled, 62);
auto ty_prefix = v0_type_prefix (ty);

rust_unreachable ();
}
Expand Down
14 changes: 8 additions & 6 deletions gcc/rust/lex/rust-lex.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2543,8 +2543,9 @@ Lexer::start_line (int current_line, int current_column)
namespace selftest {

// Checks if `src` has the same contents as the given characters
void
assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
static void
assert_source_content (Rust::InputSource &src,
const std::vector<uint32_t> &expected)
{
Rust::Codepoint src_char = src.next ();
for (auto expected_char : expected)
Expand All @@ -2559,15 +2560,16 @@ assert_source_content (Rust::InputSource &src, std::vector<uint32_t> expected)
ASSERT_TRUE (src_char.is_eof ());
}

void
test_buffer_input_source (std::string str, std::vector<uint32_t> expected)
static void
test_buffer_input_source (std::string str,
const std::vector<uint32_t> &expected)
{
Rust::BufferInputSource source (str, 0);
assert_source_content (source, expected);
}

void
test_file_input_source (std::string str, std::vector<uint32_t> expected)
static void
test_file_input_source (std::string str, const std::vector<uint32_t> &expected)
{
FILE *tmpf = tmpfile ();
// Moves to the first character
Expand Down
2 changes: 1 addition & 1 deletion gcc/rust/resolve/rust-ast-resolve-toplevel.h
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ class ResolveTopLevel : public ResolverBase
}
else
{
CrateNum found_crate_num = UNKNOWN_CREATENUM;
CrateNum found_crate_num = UNKNOWN_CRATENUM;
bool found
= mappings->lookup_crate_name (extern_crate.get_referenced_crate (),
found_crate_num);
Expand Down
2 changes: 1 addition & 1 deletion gcc/rust/rust-session-manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ NodeId
Session::load_extern_crate (const std::string &crate_name, location_t locus)
{
// has it already been loaded?
CrateNum found_crate_num = UNKNOWN_CREATENUM;
CrateNum found_crate_num = UNKNOWN_CRATENUM;
bool found = mappings->lookup_crate_name (crate_name, found_crate_num);
if (found)
{
Expand Down
6 changes: 3 additions & 3 deletions gcc/rust/util/rust-canonical-path.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class CanonicalPath
{
rust_assert (!path.empty ());
return CanonicalPath ({std::pair<NodeId, std::string> (id, path)},
UNKNOWN_CREATENUM);
UNKNOWN_CRATENUM);
}

static CanonicalPath
Expand Down Expand Up @@ -88,7 +88,7 @@ class CanonicalPath

static CanonicalPath create_empty ()
{
return CanonicalPath ({}, UNKNOWN_CREATENUM);
return CanonicalPath ({}, UNKNOWN_CRATENUM);
}

bool is_empty () const { return segs.size () == 0; }
Expand Down Expand Up @@ -171,7 +171,7 @@ class CanonicalPath

CrateNum get_crate_num () const
{
rust_assert (crate_num != UNKNOWN_CREATENUM);
rust_assert (crate_num != UNKNOWN_CRATENUM);
return crate_num;
}

Expand Down
4 changes: 2 additions & 2 deletions gcc/rust/util/rust-hir-map.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace Analysis {
NodeMapping
NodeMapping::get_error ()
{
return NodeMapping (UNKNOWN_CREATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
return NodeMapping (UNKNOWN_CRATENUM, UNKNOWN_NODEID, UNKNOWN_HIRID,
UNKNOWN_LOCAL_DEFID);
}

Expand Down Expand Up @@ -94,7 +94,7 @@ static const HirId kDefaultHirIdBegin = 1;
static const HirId kDefaultCrateNumBegin = 0;

Mappings::Mappings ()
: crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CREATENUM),
: crateNumItr (kDefaultCrateNumBegin), currentCrateNum (UNKNOWN_CRATENUM),
hirIdIter (kDefaultHirIdBegin), nodeIdIter (kDefaultNodeIdBegin)
{
Analysis::NodeMapping node (0, 0, 0, 0);
Expand Down
2 changes: 1 addition & 1 deletion gcc/rust/util/rust-mapping-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct DefId
}
};

#define UNKNOWN_CREATENUM ((uint32_t) (0))
#define UNKNOWN_CRATENUM ((uint32_t) (UINT32_MAX))
#define UNKNOWN_NODEID ((uint32_t) (0))
#define UNKNOWN_HIRID ((uint32_t) (0))
#define UNKNOWN_LOCAL_DEFID ((uint32_t) (0))
Expand Down

0 comments on commit b1dd53f

Please sign in to comment.