Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forbid bidirectional flow control characters in literals #5146

Merged
merged 2 commits into from
Sep 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions sway-error/src/lex_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ pub enum LexErrorKind {
UnicodeEscapeOutOfRange { position: usize },
#[error("unicode escape represents an invalid char value")]
UnicodeEscapeInvalidCharValue { span: Span },
#[error("unicode text direction codepoint in literal")]
UnicodeTextDirInLiteral { position: usize, character: char },
#[error("invalid escape code")]
InvalidEscapeCode { position: usize },
#[error("invalid u256. Only hex literals are supported")]
Expand Down
1 change: 1 addition & 0 deletions sway-parse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ sway-ast = { version = "0.46.0", path = "../sway-ast" }
sway-error = { version = "0.46.0", path = "../sway-error" }
sway-types = { version = "0.46.0", path = "../sway-types" }
thiserror = "1.0"
unicode-bidi = "0.3.13"
unicode-xid = "0.2.2"

[dev-dependencies]
Expand Down
72 changes: 69 additions & 3 deletions sway-parse/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use sway_types::{
ast::{Delimiter, PunctKind},
Ident, SourceId, Span, Spanned,
};
use unicode_bidi::format_chars::{ALM, FSI, LRE, LRI, LRM, LRO, PDF, PDI, RLE, RLI, RLM, RLO};
use unicode_xid::UnicodeXID;

#[extension_trait]
Expand Down Expand Up @@ -465,14 +466,24 @@ fn lex_string(
},
)
};
let (_, next_character) = l
let (next_index, next_character) = l
.stream
.next()
.ok_or_else(|| unclosed_string_lit(l, l.src.len() - 1))?;
parsed.push(match next_character {
'\\' => parse_escape_code(l)
.map_err(|e| e.unwrap_or_else(|| unclosed_string_lit(l, l.src.len())))?,
'"' => break,
// do not allow text direction codepoints
ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO => {
let kind = LexErrorKind::UnicodeTextDirInLiteral {
position: next_index,
character: next_character,
};
let span = span_one(l, next_index, next_character);
error(l.handler, LexError { span, kind });
continue;
}
_ => next_character,
});
}
Expand Down Expand Up @@ -507,7 +518,17 @@ fn lex_char(
}
};

let (_, next_char) = next(l)?;
let (next_index, next_char) = next(l)?;
// do not allow text direction codepoints
if let ALM | FSI | LRE | LRI | LRM | LRO | PDF | PDI | RLE | RLI | RLM | RLO = next_char {
let kind = LexErrorKind::UnicodeTextDirInLiteral {
position: next_index,
character: next_char,
};
let span = span_one(l, next_index, next_char);
error(l.handler, LexError { span, kind });
}

let parsed = escape(l, next_char)?;

// Consume the closing `'`.
Expand Down Expand Up @@ -817,7 +838,52 @@ mod tests {
TokenTree,
},
};
use sway_error::handler::Handler;
use sway_error::{
error::CompileError,
handler::Handler,
lex_error::{LexError, LexErrorKind},
};

#[test]
fn lex_bidi() {
let input = "
script;
use std::string::String;
fn main() {
let a = String::from_ascii_str(\"fuel\");
let b = String::from_ascii_str(\"fuel\u{202E}\u{2066}// Same string again\u{2069}\u{2066}\");
if a.as_bytes() == b.as_bytes() {
log(\"same\");
} else {
log(\"different\");
}
let lrm = '\u{202E}';
log(lrm);
}
";
let start = 0;
let end = input.len();
let path = None;
let handler = Handler::default();
let _stream = lex_commented(&handler, &Arc::from(input), start, end, &path).unwrap();
let (errors, warnings) = handler.consume();
assert_eq!(warnings.len(), 0);
assert_eq!(errors.len(), 5);
for err in errors {
assert_matches!(
err,
CompileError::Lex {
error: LexError {
span: _,
kind: LexErrorKind::UnicodeTextDirInLiteral {
position: _,
character: _
}
}
}
);
}
}

#[test]
fn lex_commented_token_stream() {
Expand Down
Loading