From ca730f1b0c6284afe040b6926adc0a0d250e063c Mon Sep 17 00:00:00 2001 From: Raiki Tamura Date: Fri, 18 Aug 2023 13:00:47 +0900 Subject: [PATCH] gccrs: Add check for no_mangle attribute gcc/rust/ChangeLog: * lex/rust-input-source.h: Move constants from here... * util/rust-codepoint.h (struct Codepoint): ... to here * util/rust-attributes.cc (AttributeChecker::visit): Add check * util/rust-unicode.cc (is_ascii_only): New function. * util/rust-unicode.h (is_ascii_only): Likewise. * backend/rust-mangle.cc (legacy_mangle_name): Use it. * util/rust-punycode.cc (extract_basic_string): Likewise. * lex/rust-lex.cc (Lexer::parse_byte_char): Likewise. Signed-off-by: Raiki Tamura --- gcc/rust/backend/rust-mangle.cc | 2 +- gcc/rust/lex/rust-input-source.h | 3 --- gcc/rust/lex/rust-lex.cc | 2 +- gcc/rust/util/rust-attributes.cc | 11 +++++++++++ gcc/rust/util/rust-codepoint.h | 4 ++++ gcc/rust/util/rust-punycode.cc | 2 +- gcc/rust/util/rust-unicode.cc | 10 ++++++++++ gcc/rust/util/rust-unicode.h | 3 +++ 8 files changed, 31 insertions(+), 6 deletions(-) diff --git a/gcc/rust/backend/rust-mangle.cc b/gcc/rust/backend/rust-mangle.cc index 62530d65382f..ca47ce9fa102 100644 --- a/gcc/rust/backend/rust-mangle.cc +++ b/gcc/rust/backend/rust-mangle.cc @@ -87,7 +87,7 @@ legacy_mangle_name (const std::string &name) i++; m = ".."; } - else if (c.value < 0x80) + else if (c.is_ascii ()) // ASCII m.push_back (c.value); else diff --git a/gcc/rust/lex/rust-input-source.h b/gcc/rust/lex/rust-input-source.h index 32261a05cae3..03bf43b5b3b4 100644 --- a/gcc/rust/lex/rust-input-source.h +++ b/gcc/rust/lex/rust-input-source.h @@ -28,9 +28,6 @@ constexpr uint8_t UTF8_BOM1 = 0xEF; constexpr uint8_t UTF8_BOM2 = 0xBB; constexpr uint8_t UTF8_BOM3 = 0xBF; -constexpr uint32_t MAX_ASCII_CODEPOINT = 0x7F; -constexpr uint32_t CODEPOINT_INVALID = 0xFFFE; - // Input source wrapper thing. class InputSource { diff --git a/gcc/rust/lex/rust-lex.cc b/gcc/rust/lex/rust-lex.cc index 2a9246535298..c81d7cc66f33 100644 --- a/gcc/rust/lex/rust-lex.cc +++ b/gcc/rust/lex/rust-lex.cc @@ -1734,7 +1734,7 @@ Lexer::parse_byte_char (location_t loc) // otherwise, get character from direct input character byte_char = current_char; - if (byte_char.value > 0x7f) + if (!byte_char.is_ascii ()) { rust_error_at (get_current_location (), "non-ASCII character in %"); diff --git a/gcc/rust/util/rust-attributes.cc b/gcc/rust/util/rust-attributes.cc index fcab9208cf6f..87ac7f31f9aa 100644 --- a/gcc/rust/util/rust-attributes.cc +++ b/gcc/rust/util/rust-attributes.cc @@ -22,6 +22,7 @@ #include "rust-ast.h" #include "rust-ast-full.h" #include "rust-diagnostics.h" +#include "rust-unicode.h" namespace Rust { namespace Analysis { @@ -623,6 +624,12 @@ AttributeChecker::visit (AST::Function &fun) name); }; + auto check_function_name = [] (const char *name, const Identifier &ident) { + if (!is_ascii_only (ident.as_string ())) + rust_error_at (ident.get_locus (), + "the %<#[%s]%> attribute requires ASCII identifier", name); + }; + BuiltinAttrDefinition result; for (auto &attribute : fun.get_outer_attrs ()) { @@ -649,6 +656,10 @@ AttributeChecker::visit (AST::Function &fun) { check_crate_type (name, attribute); } + else if (result.name == "no_mangle") + { + check_function_name (name, fun.get_function_name ()); + } } fun.get_definition ()->accept_vis (*this); } diff --git a/gcc/rust/util/rust-codepoint.h b/gcc/rust/util/rust-codepoint.h index eaed664be944..425d364b1e14 100644 --- a/gcc/rust/util/rust-codepoint.h +++ b/gcc/rust/util/rust-codepoint.h @@ -23,6 +23,9 @@ namespace Rust { +constexpr uint32_t MAX_ASCII_CODEPOINT = 0x7F; +constexpr uint32_t CODEPOINT_INVALID = 0xFFFE; + // FIXME: move this to rust-unicode.h? struct Codepoint { @@ -36,6 +39,7 @@ struct Codepoint static Codepoint eof () { return Codepoint (UINT32_MAX); } bool is_eof () const { return value == UINT32_MAX; } + bool is_ascii () const { return value <= MAX_ASCII_CODEPOINT; } // Returns a C++ string containing string value of codepoint. std::string as_string (); diff --git a/gcc/rust/util/rust-punycode.cc b/gcc/rust/util/rust-punycode.cc index 8da724cc65e9..a9c8cbfc5fca 100644 --- a/gcc/rust/util/rust-punycode.cc +++ b/gcc/rust/util/rust-punycode.cc @@ -42,7 +42,7 @@ extract_basic_string (const std::vector &src) std::string basic_string; for (auto c : src) { - if (c.value <= 0x7F) + if (c.is_ascii ()) basic_string += c.as_string (); } return basic_string; diff --git a/gcc/rust/util/rust-unicode.cc b/gcc/rust/util/rust-unicode.cc index 95653cb760db..999ecb042ca3 100644 --- a/gcc/rust/util/rust-unicode.cc +++ b/gcc/rust/util/rust-unicode.cc @@ -16,6 +16,7 @@ // along with GCC; see the file COPYING3. If not see // . +#include "rust-input-source.h" #include "rust-system.h" #include "optional.h" #include "selftest.h" @@ -328,6 +329,15 @@ is_numeric (uint32_t codepoint) return true; } +bool +is_ascii_only (const std::string &str) +{ + for (char c : str) + if (static_cast (c) > MAX_ASCII_CODEPOINT) + return false; + return true; +} + } // namespace Rust #if CHECKING_P diff --git a/gcc/rust/util/rust-unicode.h b/gcc/rust/util/rust-unicode.h index becf6fb6a0c7..2538436797fb 100644 --- a/gcc/rust/util/rust-unicode.h +++ b/gcc/rust/util/rust-unicode.h @@ -62,6 +62,9 @@ class Utf8String bool is_alphabetic (uint32_t codepoint); +bool +is_ascii_only (const std::string &str); + bool is_numeric (uint32_t codepoint);