Skip to content

Commit

Permalink
line:col positions in parser
Browse files Browse the repository at this point in the history
  • Loading branch information
kazcw committed Nov 1, 2023
1 parent 8bc17bd commit 5acbcd2
Show file tree
Hide file tree
Showing 13 changed files with 446 additions and 257 deletions.
6 changes: 3 additions & 3 deletions app/gui2/parser-codegen/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ const RENAME = new Map([
// Rename source references to reflect our usage:
// - In `Tree`s:
['spanLeftOffsetCodeOffsetUtf16', 'whitespaceStartInCodeParsed'],
['spanLeftOffsetCodeUtf16', 'whitespaceLengthInCodeParsed'],
['spanLeftOffsetCodeLenUtf16', 'whitespaceLengthInCodeParsed'],
['spanCodeLengthUtf16', 'childrenLengthInCodeParsed'],
// - In `Tokens`s:
['leftOffsetCodeOffsetUtf16', 'whitespaceStartInCodeBuffer'],
['leftOffsetCodeUtf16', 'whitespaceLengthInCodeBuffer'],
['codeUtf16', 'lengthInCodeBuffer'],
['leftOffsetCodeLenUtf16', 'whitespaceLengthInCodeBuffer'],
['codeLenUtf16', 'lengthInCodeBuffer'],
['codeOffsetUtf16', 'startInCodeBuffer'],
])

Expand Down
2 changes: 1 addition & 1 deletion lib/rust/parser/debug/src/bin/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ pub fn main() {
use std::io::Read;
let mut input = String::new();
std::io::stdin().read_to_string(&mut input).unwrap();
println!("{:#?}", enso_parser::lexer::run(&input));
println!("{:#?}", enso_parser::lexer::debug::lex_and_validate_spans(&input));
}
39 changes: 30 additions & 9 deletions lib/rust/parser/debug/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
#![warn(unused_qualifications)]

use enso_metamodel_lexpr::ToSExpr;
use enso_parser::source::code::debug::LocationCheck;
use enso_reflect::Reflect;
use lexpr::Value;
use std::collections::HashSet;



// =====================
// === S-expressions ===
// =====================
Expand Down Expand Up @@ -122,10 +122,18 @@ fn strip_hidden_fields(tree: Value) -> Value {
":spanLeftOffsetVisible",
":spanLeftOffsetCodeReprBegin",
":spanLeftOffsetCodeReprLen",
":spanLeftOffsetCodeUtf16",
":spanLeftOffsetCodeLenUtf8",
":spanLeftOffsetCodeLenUtf16",
":spanLeftOffsetCodeLenNewlines",
":spanLeftOffsetCodeLenLineChars",
":spanLeftOffsetCodeOffsetUtf8",
":spanLeftOffsetCodeOffsetUtf16",
":spanLeftOffsetCodeOffsetLine",
":spanLeftOffsetCodeOffsetCol",
":spanCodeLengthUtf8",
":spanCodeLengthUtf16",
":spanCodeLengthNewlines",
":spanCodeLengthLineChars",
];
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
Expand Down Expand Up @@ -194,7 +202,11 @@ fn tuplify(value: Value) -> Value {

/// Check the internal consistency of the `Tree` and `Token` spans from the given root, and validate
/// that every character in the given range is covered exactly once in the token spans.
pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std::ops::Range<u32>) {
pub fn validate_spans(
tree: &enso_parser::syntax::tree::Tree,
expected_span: std::ops::Range<u32>,
locations: &mut LocationCheck,
) {
let mut sum_span = None;
fn concat<T: PartialEq + std::fmt::Debug + Copy>(
a: &Option<std::ops::Range<T>>,
Expand All @@ -208,24 +220,33 @@ pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std
None => b.clone(),
}
}
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range()));
tree.visit_items(|item| match item {
enso_parser::syntax::item::Ref::Token(token) => {
if !(token.left_offset.is_empty() && token.code.is_empty()) {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.left_offset.code.range()));
sum_span = Some(concat(&sum_span, &token.code.range()));
}
let left_offset = token.left_offset.code.range();
let code = token.code.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
enso_parser::syntax::item::Ref::Tree(tree) => {
let children_span =
concat(&Some(tree.span.left_offset.code.range_utf16()), &tree.span.range_utf16());
validate_spans(tree, children_span.clone());
concat(&Some(tree.span.left_offset.code.range()), &tree.span.range());
let children_span_ = children_span.start.utf16..children_span.end.utf16;
validate_spans(tree, children_span_, locations);
sum_span = Some(concat(&sum_span, &children_span));
let left_offset = tree.span.left_offset.code.range();
let code = tree.span.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
});
if expected_span.is_empty() {
assert!(sum_span.map_or(true, |range| range.is_empty()));
} else {
assert_eq!(sum_span.unwrap(), expected_span);
let sum_span = sum_span.unwrap_or_default();
let sum_span = sum_span.start.utf16..sum_span.end.utf16;
assert_eq!(sum_span, expected_span);
}
}
4 changes: 3 additions & 1 deletion lib/rust/parser/debug/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ fn check_file(path: &str, mut code: &str) {
}
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {path}");
}
locations.check(code);
let s_expr = enso_parser_debug::to_s_expr(&ast, code);
println!("{s_expr}");
}
47 changes: 23 additions & 24 deletions lib/rust/parser/debug/tests/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,13 @@ fn dot_operator_blocks() {

#[test]
fn code_block_argument_list() {
#[rustfmt::skip]
let code = [
"foo",
" bar",
];
test!(&code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar))));

#[rustfmt::skip]
let code = [
"value = foo",
Expand All @@ -492,7 +499,6 @@ fn code_block_argument_list() {
];
test(&code.join("\n"), expect);


#[rustfmt::skip]
let code = [
"value = foo",
Expand Down Expand Up @@ -1012,28 +1018,19 @@ x"#;

#[test]
fn interpolated_literals_in_inline_text() {
#[rustfmt::skip]
let cases = [
(r#"'Simple case.'"#, block![(TextLiteral #((Section "Simple case.")))]),
(r#"'With a `splice`.'"#, block![(TextLiteral
#((Section "With a ")
(Splice (Ident splice))
(Section ".")))]),
(r#"'` SpliceWithLeadingWhitespace`'"#, block![(TextLiteral
#((Splice (Ident SpliceWithLeadingWhitespace))))]),
(r#"'String with \n escape'"#, block![
(TextLiteral
#((Section "String with ") (Escape '\n') (Section " escape")))]),
(r#"'\x0Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\u000Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\u{0000A}escape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\U0000000Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
test!(r#"'Simple case.'"#, (TextLiteral #((Section "Simple case."))));
test!(r#"'With a `splice`.'"#, (TextLiteral
#((Section "With a ")
(Splice (Ident splice))
(Section "."))));
test!(r#"'` SpliceWithLeadingWhitespace`'"#,
(TextLiteral #((Splice (Ident SpliceWithLeadingWhitespace)))));
test!(r#"'String with \n escape'"#,
(TextLiteral #((Section "String with ") (Escape '\n') (Section " escape"))));
test!(r#"'\x0Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\u000Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\u{0000A}escape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\U0000000Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
}

#[test]
Expand Down Expand Up @@ -1580,7 +1577,9 @@ fn test(code: &str, expect: lexpr::Value) {
fn parse(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
locations.check(code);
ast
}

Expand Down
7 changes: 6 additions & 1 deletion lib/rust/parser/debug/tools/parse_all_enso_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
set -e

cargo build -p enso-parser-debug --bin enso-parser-debug
cargo build -p enso-parser-debug --bin lexer

ENSO_FILES=$(find distribution/ test/ -name '*.enso' -print | sort)
for x in $ENSO_FILES; do echo -n "$x "; target/rust/debug/enso-parser-debug <$x; done
for x in $ENSO_FILES; do
echo -n "$x "
target/rust/debug/lexer <$x >/dev/null
target/rust/debug/enso-parser-debug <$x
done

9 changes: 6 additions & 3 deletions lib/rust/parser/generate-java/src/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ const CODE_GETTER: &str = "codeRepr";
const WHITESPACE_GETTER: &str = "getWhitespace";
const TREE_BEGIN: &str = "fieldSpanLeftOffsetCodeReprBegin";
const TREE_LEN: &str = "fieldSpanLeftOffsetCodeReprLen";
const TREE_WHITESPACE: &str = "fieldSpanLeftOffsetCodeLenUtf16";
const TOKEN_WHITESPACE: &str = "fieldLeftOffsetCodeLenUtf16";
const TOKEN_CODE_LENGTH: &str = "fieldCodeLenUtf16";

/// Derive deserialization for all types in the typegraph.
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
Expand Down Expand Up @@ -151,16 +154,16 @@ fn start_whitespace() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'sta
|MaterializerInput { message }| format!("{message}.position()")
}
fn start_code_tree() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldSpanLeftOffsetCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TREE_WHITESPACE})")
}
fn end_code_tree() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.position()")
}
fn start_code_token() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldLeftOffsetCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TOKEN_WHITESPACE})")
}
fn end_code_token() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TOKEN_CODE_LENGTH})")
}


Expand Down
Loading

0 comments on commit 5acbcd2

Please sign in to comment.