diff --git a/hilti/runtime/include/util.h b/hilti/runtime/include/util.h index 526b823a5..ca2b3843b 100644 --- a/hilti/runtime/include/util.h +++ b/hilti/runtime/include/util.h @@ -286,7 +286,7 @@ constexpr auto enumerate(T&& iterable) { * @param str string to expand * @return A UTF8 string with escape sequences expanded */ -std::string expandEscapes(std::string s); +std::string expandUTF8Escapes(std::string s); namespace render_style { @@ -308,7 +308,7 @@ enum class Bytes { * corresponding C-style control escapes (e.g., `\n`, `\0`), and escapes any * backslashes with a second backslash. Any specified flags modify the default * style accordingly. If not otherwise noted, any escapings are reversible - * through `expandEscapes()`. + * through `expandUTF8Escapes()`. */ enum class UTF8 { Default = 0, /**< name for unmodified default style */ diff --git a/hilti/runtime/src/tests/util.cc b/hilti/runtime/src/tests/util.cc index 17e48b698..13bce427e 100644 --- a/hilti/runtime/src/tests/util.cc +++ b/hilti/runtime/src/tests/util.cc @@ -256,46 +256,46 @@ TEST_CASE("escapeUTF8") { } } -TEST_CASE("expandEscapes") { - CHECK_EQ(expandEscapes(""), ""); - CHECK_EQ(expandEscapes("ab\n12"), "ab\n12"); - CHECK_EQ(expandEscapes("ab\\n12"), "ab\n12"); - CHECK_THROWS_WITH_AS(expandEscapes("ab\\\n12"), "unknown escape sequence", const Exception&); - CHECK_EQ(expandEscapes("ab\\\\n12"), "ab\\n12"); - CHECK_EQ(expandEscapes("ab\\\\\n12"), "ab\\\n12"); - - CHECK_THROWS_WITH_AS(expandEscapes("\\"), "broken escape sequence", const Exception&); - - CHECK_EQ(expandEscapes("\\\""), "\""); - CHECK_EQ(expandEscapes("\\r"), "\r"); - CHECK_EQ(expandEscapes("\\n"), "\n"); - CHECK_EQ(expandEscapes("\\t"), "\t"); - CHECK_EQ(expandEscapes("\\0"), std::string(1U, '\0')); - CHECK_EQ(expandEscapes("\\a"), "\a"); - CHECK_EQ(expandEscapes("\\b"), "\b"); - CHECK_EQ(expandEscapes("\\v"), "\v"); - CHECK_EQ(expandEscapes("\\f"), "\f"); - CHECK_EQ(expandEscapes("\\e"), "\e"); - - CHECK_THROWS_WITH_AS(expandEscapes("\\uFOO"), "incomplete unicode \\u", const Exception&); - CHECK_THROWS_WITH_AS(expandEscapes("\\uFOOL"), "cannot decode character", const Exception&); - CHECK_EQ(expandEscapes("\\u2614"), "☔"); +TEST_CASE("expandUTF8Escapes") { + CHECK_EQ(expandUTF8Escapes(""), ""); + CHECK_EQ(expandUTF8Escapes("ab\n12"), "ab\n12"); + CHECK_EQ(expandUTF8Escapes("ab\\n12"), "ab\n12"); + CHECK_THROWS_WITH_AS(expandUTF8Escapes("ab\\\n12"), "unknown escape sequence", const Exception&); + CHECK_EQ(expandUTF8Escapes("ab\\\\n12"), "ab\\n12"); + CHECK_EQ(expandUTF8Escapes("ab\\\\\n12"), "ab\\\n12"); + + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\"), "broken escape sequence", const Exception&); + + CHECK_EQ(expandUTF8Escapes("\\\""), "\""); + CHECK_EQ(expandUTF8Escapes("\\r"), "\r"); + CHECK_EQ(expandUTF8Escapes("\\n"), "\n"); + CHECK_EQ(expandUTF8Escapes("\\t"), "\t"); + CHECK_EQ(expandUTF8Escapes("\\0"), std::string(1U, '\0')); + CHECK_EQ(expandUTF8Escapes("\\a"), "\a"); + CHECK_EQ(expandUTF8Escapes("\\b"), "\b"); + CHECK_EQ(expandUTF8Escapes("\\v"), "\v"); + CHECK_EQ(expandUTF8Escapes("\\f"), "\f"); + CHECK_EQ(expandUTF8Escapes("\\e"), "\e"); + + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\uFOO"), "incomplete unicode \\u", const Exception&); + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\uFOOL"), "cannot decode character", const Exception&); + CHECK_EQ(expandUTF8Escapes("\\u2614"), "☔"); // We assume a max value of \uFFFF so the following is expanded as `\u1F60` and `E`, not `😎`. - CHECK_EQ(expandEscapes("\\u1F60E"), "ὠE"); - - CHECK_THROWS_WITH_AS(expandEscapes("\\UFOO"), "incomplete unicode \\U", const Exception&); - CHECK_THROWS_WITH_AS(expandEscapes("\\UFOOBAR"), "incomplete unicode \\U", const Exception&); - CHECK_THROWS_WITH_AS(expandEscapes("\\UFOOBARBAZ"), "cannot decode character", const Exception&); - CHECK_EQ(expandEscapes("\\U00002614"), "☔"); - CHECK_EQ(expandEscapes("\\U0001F60E"), "😎"); - - CHECK_THROWS_WITH_AS(expandEscapes("\\x"), "\\x used with no following hex digits", const Exception&); - CHECK_THROWS_WITH_AS(expandEscapes("\\xZ"), "cannot decode character", const Exception&); - CHECK_EQ(expandEscapes("\\xA"), "\xA"); - CHECK_EQ(expandEscapes("\\xAB"), "\xAB"); - CHECK_THROWS_WITH_AS(expandEscapes("\\xAZ"), "cannot decode character", const Exception&); - CHECK_EQ(expandEscapes("\\xABC"), std::string("\xAB") + "C"); - CHECK_EQ(expandEscapes("\\x01"), "\x01"); + CHECK_EQ(expandUTF8Escapes("\\u1F60E"), "ὠE"); + + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\UFOO"), "incomplete unicode \\U", const Exception&); + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\UFOOBAR"), "incomplete unicode \\U", const Exception&); + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\UFOOBARBAZ"), "cannot decode character", const Exception&); + CHECK_EQ(expandUTF8Escapes("\\U00002614"), "☔"); + CHECK_EQ(expandUTF8Escapes("\\U0001F60E"), "😎"); + + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\x"), "\\x used with no following hex digits", const Exception&); + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\xZ"), "cannot decode character", const Exception&); + CHECK_EQ(expandUTF8Escapes("\\xA"), "\xA"); + CHECK_EQ(expandUTF8Escapes("\\xAB"), "\xAB"); + CHECK_THROWS_WITH_AS(expandUTF8Escapes("\\xAZ"), "cannot decode character", const Exception&); + CHECK_EQ(expandUTF8Escapes("\\xABC"), std::string("\xAB") + "C"); + CHECK_EQ(expandUTF8Escapes("\\x01"), "\x01"); } TEST_CASE("getenv") { diff --git a/hilti/runtime/src/util.cc b/hilti/runtime/src/util.cc index 2b8305dd1..c2d6b56c2 100644 --- a/hilti/runtime/src/util.cc +++ b/hilti/runtime/src/util.cc @@ -173,7 +173,7 @@ std::pair hilti::rt::rsplit1(std::string s, const std: // In-place implementation copies chars shrinking escape sequences to binary. // Requires that binary results are not larger than their escape sequence. -std::string hilti::rt::expandEscapes(std::string s) { +std::string hilti::rt::expandUTF8Escapes(std::string s) { auto d = s.begin(); for ( auto c = d; c != s.end(); ) { if ( *c != '\\' ) { diff --git a/hilti/toolchain/include/base/util.h b/hilti/toolchain/include/base/util.h index ad7e549a3..679180297 100644 --- a/hilti/toolchain/include/base/util.h +++ b/hilti/toolchain/include/base/util.h @@ -378,8 +378,8 @@ using hilti::rt::render_style::UTF8; } // namespace render_style using hilti::rt::escapeBytes; -using hilti::rt::escapeUTF8; // NOLINT(misc-unused-using-decls) -using hilti::rt::expandEscapes; // NOLINT(misc-unused-using-decls) +using hilti::rt::escapeUTF8; // NOLINT(misc-unused-using-decls) +using hilti::rt::expandUTF8Escapes; // NOLINT(misc-unused-using-decls) /** * Wrapper for `escapeBytes` that produces a valid C++ string literal. diff --git a/hilti/toolchain/src/compiler/parser/scanner.ll b/hilti/toolchain/src/compiler/parser/scanner.ll index 4fdc05a85..6c6133030 100644 --- a/hilti/toolchain/src/compiler/parser/scanner.ll +++ b/hilti/toolchain/src/compiler/parser/scanner.ll @@ -33,9 +33,9 @@ static hilti::Meta toMeta(hilti::detail::parser::location l) { return hilti::Meta(hilti::Location(*l.begin.filename, l.begin.line, l.end.line, l.begin.column, l.end.column)); } -static std::string expandEscapes(detail::parser::Driver* driver, std::string s, hilti::detail::parser::location l) { +static std::string expandUTF8Escapes(detail::parser::Driver* driver, std::string s, hilti::detail::parser::location l) { try { - return hilti::util::expandEscapes(s); + return hilti::util::expandUTF8Escapes(s); } catch ( const hilti::rt::Exception& ) { driver->error("invalid escape sequence", toMeta(l)); return ""; @@ -206,8 +206,8 @@ True yylval->build(true); return token::CBOOL; '.' yylval->build(static_cast(*(yytext + 1))); return token::CUINTEGER; {decfloat}|{hexfloat} yylval->build(hilti::util::charsToDouble(yytext, range_error_real)); return token::CUREAL; -{string} yylval->build(expandEscapes(driver, std::string(yytext, 1, strlen(yytext) - 2), *yylloc)); return token::CSTRING; -b{string} yylval->build(expandEscapes(driver, std::string(yytext, 2, strlen(yytext) - 3), *yylloc)); return token::CBYTES; +{string} yylval->build(expandUTF8Escapes(driver, std::string(yytext, 1, strlen(yytext) - 2), *yylloc)); return token::CSTRING; +b{string} yylval->build(expandUTF8Escapes(driver, std::string(yytext, 2, strlen(yytext) - 3), *yylloc)); return token::CBYTES; {digits}\/(tcp|udp) yylval->build(std::string(yytext)); return token::CPORT; {address4} yylval->build(std::string(yytext)); return token::CADDRESS; {address6} yylval->build(std::string(yytext, 1, strlen(yytext) - 2)); return token::CADDRESS; diff --git a/spicy/toolchain/src/compiler/parser/scanner.ll b/spicy/toolchain/src/compiler/parser/scanner.ll index 119da6ef5..4a79225a5 100644 --- a/spicy/toolchain/src/compiler/parser/scanner.ll +++ b/spicy/toolchain/src/compiler/parser/scanner.ll @@ -37,9 +37,9 @@ static hilti::Meta toMeta(spicy::detail::parser::location l) { return hilti::Meta(hilti::Location(*l.begin.filename, l.begin.line, l.end.line, l.begin.column, l.end.column)); } -static std::string expandEscapes(Driver* driver, std::string s, spicy::detail::parser::location l) { +static std::string expandUTF8Escapes(Driver* driver, std::string s, spicy::detail::parser::location l) { try { - return hilti::util::expandEscapes(s); + return hilti::util::expandUTF8Escapes(s); } catch ( const hilti::rt::Exception& ) { driver->error("invalid escape sequence", toMeta(l)); return ""; @@ -238,9 +238,9 @@ Null return token::CNULL; {address6} yylval->build(std::string(yytext, 1, strlen(yytext) - 2)); return token::CADDRESS; {digits}|0x{hexs} yylval->build(hilti::util::charsToUInt64(yytext, 0, range_error_int)); return token::CUINTEGER; -{string} yylval->build(expandEscapes(driver, std::string(yytext, 1, strlen(yytext) - 2), *yylloc)); return token::CSTRING; -b{string} yylval->build(expandEscapes(driver, std::string(yytext, 2, strlen(yytext) - 3), *yylloc)); return token::CBYTES; -error{string} yylval->build(expandEscapes(driver, std::string(yytext, 6, strlen(yytext) - 7), *yylloc)); return token::CERROR; +{string} yylval->build(expandUTF8Escapes(driver, std::string(yytext, 1, strlen(yytext) - 2), *yylloc)); return token::CSTRING; +b{string} yylval->build(expandUTF8Escapes(driver, std::string(yytext, 2, strlen(yytext) - 3), *yylloc)); return token::CBYTES; +error{string} yylval->build(expandUTF8Escapes(driver, std::string(yytext, 6, strlen(yytext) - 7), *yylloc)); return token::CERROR; '.' yylval->build(static_cast(*(yytext +1))); return token::CUINTEGER; {decfloat}|{hexfloat} yylval->build(hilti::util::charsToDouble(yytext, range_error_real)); return token::CUREAL;