Skip to content

Commit

Permalink
Forbid bidirectional flow control characters in literals
Browse files Browse the repository at this point in the history
Forbid directional formatting characters from
[UAX #9](https://www.unicode.org/reports/tr9/#Directional_Formatting_Characters)
in literals to fix #5047.
This is similar to rustc's `text_direction_codepoint_in_literal` lint.
Such characters are already implicitly forbidden in other parts of the syntax.
  • Loading branch information
IGI-111 committed Sep 26, 2023
1 parent 8b3ed78 commit 0f6e776
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 3 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions sway-error/src/lex_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ pub enum LexErrorKind {
UnicodeEscapeOutOfRange { position: usize },
#[error("unicode escape represents an invalid char value")]
UnicodeEscapeInvalidCharValue { span: Span },
#[error("unicode text direction codepoint in literal")]
UnicodeTextDirInLiteral { position: usize, character: char },
#[error("invalid escape code")]
InvalidEscapeCode { position: usize },
#[error("invalid u256. Only hex literals are supported")]
Expand Down
1 change: 1 addition & 0 deletions sway-parse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ sway-ast = { version = "0.46.0", path = "../sway-ast" }
sway-error = { version = "0.46.0", path = "../sway-error" }
sway-types = { version = "0.46.0", path = "../sway-types" }
thiserror = "1.0"
unicode-bidi = "0.3.13"
unicode-xid = "0.2.2"

[dev-dependencies]
Expand Down
72 changes: 69 additions & 3 deletions sway-parse/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use sway_types::{
ast::{Delimiter, PunctKind},
Ident, SourceId, Span, Spanned,
};
use unicode_bidi::format_chars::{ALM, FSI, LRE, LRI, LRM, LRO, PDF, PDI, RLE, RLI, RLM, RLO};
use unicode_xid::UnicodeXID;

#[extension_trait]
Expand Down Expand Up @@ -465,14 +466,24 @@ fn lex_string(
},
)
};
let (_, next_character) = l
let (next_index, next_character) = l
.stream
.next()
.ok_or_else(|| unclosed_string_lit(l, l.src.len() - 1))?;
parsed.push(match next_character {
'\\' => parse_escape_code(l)
.map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.len())))?,
'"' => break,
// do not allow text direction codepoints
ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO => {
let kind = LexErrorKind::UnicodeTextDirInLiteral {
position: next_index,
character: next_character,
};
let span = span_one(l, next_index, next_character);
error(l.handler, LexError { span, kind });
continue;
}
_ => next_character,
});
}
Expand Down Expand Up @@ -507,7 +518,17 @@ fn lex_char(
}
};

let (_, next_char) = next(l)?;
let (next_index, next_char) = next(l)?;
// do not allow text direction codepoints
if let ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO = next_char {
let kind = LexErrorKind::UnicodeTextDirInLiteral {
position: next_index,
character: next_char,
};
let span = span_one(l, next_index, next_char);
error(l.handler, LexError { span, kind });
}

let parsed = escape(l, next_char)?;

// Consume the closing `'`.
Expand Down Expand Up @@ -817,7 +838,52 @@ mod tests {
TokenTree,
},
};
use sway_error::handler::Handler;
use sway_error::{
error::CompileError,
handler::Handler,
lex_error::{LexError, LexErrorKind},
};

#[test]
fn lex_bidi() {
let input = "
script;
use std::string::String;
fn main() {
let a = String::from_ascii_str(\"fuel\");
let b = String::from_ascii_str(\"fuel\u{202E}\u{2066}// Same string again\u{2069}\u{2066}\");
if a.as_bytes() == b.as_bytes() {
log(\"same\");
} else {
log(\"different\");
}
let lrm = '\u{202E}';
log(lrm);
}
";
let start = 0;
let end = input.len();
let path = None;
let handler = Handler::default();
let _stream = lex_commented(&handler, &Arc::from(input), start, end, &path).unwrap();
let (errors, warnings) = handler.consume();
assert_eq!(warnings.len(), 0);
assert_eq!(errors.len(), 5);
for err in errors {
assert_matches!(
err,
CompileError::Lex {
error: LexError {
span: _,
kind: LexErrorKind::UnicodeTextDirInLiteral {
position: _,
character: _
}
}
}
);
}
}

#[test]
fn lex_commented_token_stream() {
Expand Down

0 comments on commit 0f6e776

Please sign in to comment.