Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

line:col positions in parser #8203

Merged
merged 8 commits into from
Nov 8, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions app/gui2/parser-codegen/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ const RENAME = new Map([
// Rename source references to reflect our usage:
// - In `Tree`s:
['spanLeftOffsetCodeOffsetUtf16', 'whitespaceStartInCodeParsed'],
['spanLeftOffsetCodeUtf16', 'whitespaceLengthInCodeParsed'],
['spanLeftOffsetCodeLenUtf16', 'whitespaceLengthInCodeParsed'],
['spanCodeLengthUtf16', 'childrenLengthInCodeParsed'],
// - In `Tokens`s:
['leftOffsetCodeOffsetUtf16', 'whitespaceStartInCodeBuffer'],
['leftOffsetCodeUtf16', 'whitespaceLengthInCodeBuffer'],
['codeUtf16', 'lengthInCodeBuffer'],
['leftOffsetCodeLenUtf16', 'whitespaceLengthInCodeBuffer'],
['codeLenUtf16', 'lengthInCodeBuffer'],
['codeOffsetUtf16', 'startInCodeBuffer'],
])

Expand Down
2 changes: 1 addition & 1 deletion lib/rust/parser/debug/src/bin/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,5 @@ pub fn main() {
use std::io::Read;
let mut input = String::new();
std::io::stdin().read_to_string(&mut input).unwrap();
println!("{:#?}", enso_parser::lexer::run(&input));
println!("{:#?}", enso_parser::lexer::debug::lex_and_validate_spans(&input));
}
39 changes: 30 additions & 9 deletions lib/rust/parser/debug/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
#![warn(unused_qualifications)]

use enso_metamodel_lexpr::ToSExpr;
use enso_parser::source::code::debug::LocationCheck;
use enso_reflect::Reflect;
use lexpr::Value;
use std::collections::HashSet;



kazcw marked this conversation as resolved.
Show resolved Hide resolved
// =====================
// === S-expressions ===
// =====================
Expand Down Expand Up @@ -122,10 +122,18 @@ fn strip_hidden_fields(tree: Value) -> Value {
":spanLeftOffsetVisible",
":spanLeftOffsetCodeReprBegin",
":spanLeftOffsetCodeReprLen",
":spanLeftOffsetCodeUtf16",
":spanLeftOffsetCodeLenUtf8",
":spanLeftOffsetCodeLenUtf16",
":spanLeftOffsetCodeLenNewlines",
":spanLeftOffsetCodeLenLineChars",
":spanLeftOffsetCodeOffsetUtf8",
":spanLeftOffsetCodeOffsetUtf16",
":spanLeftOffsetCodeOffsetLine",
":spanLeftOffsetCodeOffsetCol",
":spanCodeLengthUtf8",
":spanCodeLengthUtf16",
":spanCodeLengthNewlines",
":spanCodeLengthLineChars",
];
let hidden_tree_fields: HashSet<_> = hidden_tree_fields.into_iter().collect();
Value::list(tree.to_vec().unwrap().into_iter().filter(|val| match val {
Expand Down Expand Up @@ -194,7 +202,11 @@ fn tuplify(value: Value) -> Value {

/// Check the internal consistency of the `Tree` and `Token` spans from the given root, and validate
/// that every character in the given range is covered exactly once in the token spans.
pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std::ops::Range<u32>) {
pub fn validate_spans(
tree: &enso_parser::syntax::tree::Tree,
expected_span: std::ops::Range<u32>,
locations: &mut LocationCheck,
) {
let mut sum_span = None;
fn concat<T: PartialEq + std::fmt::Debug + Copy>(
a: &Option<std::ops::Range<T>>,
Expand All @@ -208,24 +220,33 @@ pub fn validate_spans(tree: &enso_parser::syntax::tree::Tree, expected_span: std
None => b.clone(),
}
}
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &tree.span.left_offset.code.range()));
tree.visit_items(|item| match item {
enso_parser::syntax::item::Ref::Token(token) => {
if !(token.left_offset.is_empty() && token.code.is_empty()) {
sum_span = Some(concat(&sum_span, &token.left_offset.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.code.range_utf16()));
sum_span = Some(concat(&sum_span, &token.left_offset.code.range()));
sum_span = Some(concat(&sum_span, &token.code.range()));
}
let left_offset = token.left_offset.code.range();
let code = token.code.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
enso_parser::syntax::item::Ref::Tree(tree) => {
let children_span =
concat(&Some(tree.span.left_offset.code.range_utf16()), &tree.span.range_utf16());
validate_spans(tree, children_span.clone());
concat(&Some(tree.span.left_offset.code.range()), &tree.span.range());
let children_span_ = children_span.start.utf16..children_span.end.utf16;
validate_spans(tree, children_span_, locations);
sum_span = Some(concat(&sum_span, &children_span));
let left_offset = tree.span.left_offset.code.range();
let code = tree.span.range();
locations.extend(&[left_offset.start, left_offset.end, code.start, code.end]);
}
});
if expected_span.is_empty() {
assert!(sum_span.map_or(true, |range| range.is_empty()));
} else {
assert_eq!(sum_span.unwrap(), expected_span);
let sum_span = sum_span.unwrap_or_default();
let sum_span = sum_span.start.utf16..sum_span.end.utf16;
assert_eq!(sum_span, expected_span);
}
}
4 changes: 3 additions & 1 deletion lib/rust/parser/debug/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ fn check_file(path: &str, mut code: &str) {
}
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
for (parsed, original) in ast.code().lines().zip(code.lines()) {
assert_eq!(parsed, original, "Bug: dropped tokens, while parsing: {path}");
}
locations.check(code);
let s_expr = enso_parser_debug::to_s_expr(&ast, code);
println!("{s_expr}");
}
47 changes: 23 additions & 24 deletions lib/rust/parser/debug/tests/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,13 @@ fn dot_operator_blocks() {

#[test]
fn code_block_argument_list() {
#[rustfmt::skip]
let code = [
"foo",
" bar",
];
test!(&code.join("\n"), (ArgumentBlockApplication (Ident foo) #((Ident bar))));

#[rustfmt::skip]
let code = [
"value = foo",
Expand All @@ -492,7 +499,6 @@ fn code_block_argument_list() {
];
test(&code.join("\n"), expect);


#[rustfmt::skip]
let code = [
"value = foo",
Expand Down Expand Up @@ -1012,28 +1018,19 @@ x"#;

#[test]
fn interpolated_literals_in_inline_text() {
#[rustfmt::skip]
let cases = [
(r#"'Simple case.'"#, block![(TextLiteral #((Section "Simple case.")))]),
(r#"'With a `splice`.'"#, block![(TextLiteral
#((Section "With a ")
(Splice (Ident splice))
(Section ".")))]),
(r#"'` SpliceWithLeadingWhitespace`'"#, block![(TextLiteral
#((Splice (Ident SpliceWithLeadingWhitespace))))]),
(r#"'String with \n escape'"#, block![
(TextLiteral
#((Section "String with ") (Escape '\n') (Section " escape")))]),
(r#"'\x0Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\u000Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\u{0000A}escape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
(r#"'\U0000000Aescape'"#, block![
(TextLiteral #((Escape '\n') (Section "escape")))]),
];
cases.into_iter().for_each(|(code, expected)| test(code, expected));
test!(r#"'Simple case.'"#, (TextLiteral #((Section "Simple case."))));
test!(r#"'With a `splice`.'"#, (TextLiteral
#((Section "With a ")
(Splice (Ident splice))
(Section "."))));
test!(r#"'` SpliceWithLeadingWhitespace`'"#,
(TextLiteral #((Splice (Ident SpliceWithLeadingWhitespace)))));
test!(r#"'String with \n escape'"#,
(TextLiteral #((Section "String with ") (Escape '\n') (Section " escape"))));
test!(r#"'\x0Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\u000Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\u{0000A}escape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
test!(r#"'\U0000000Aescape'"#, (TextLiteral #((Escape '\n') (Section "escape"))));
}

#[test]
Expand Down Expand Up @@ -1580,7 +1577,9 @@ fn test(code: &str, expect: lexpr::Value) {
fn parse(code: &str) -> enso_parser::syntax::tree::Tree {
let ast = enso_parser::Parser::new().run(code);
let expected_span = 0..(code.encode_utf16().count() as u32);
enso_parser_debug::validate_spans(&ast, expected_span);
let mut locations = enso_parser::source::code::debug::LocationCheck::new();
enso_parser_debug::validate_spans(&ast, expected_span, &mut locations);
locations.check(code);
ast
}

Expand Down
7 changes: 6 additions & 1 deletion lib/rust/parser/debug/tools/parse_all_enso_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
set -e

cargo build -p enso-parser-debug --bin enso-parser-debug
cargo build -p enso-parser-debug --bin lexer

ENSO_FILES=$(find distribution/ test/ -name '*.enso' -print | sort)
for x in $ENSO_FILES; do echo -n "$x "; target/rust/debug/enso-parser-debug <$x; done
for x in $ENSO_FILES; do
echo -n "$x "
target/rust/debug/lexer <$x >/dev/null
target/rust/debug/enso-parser-debug <$x
done

9 changes: 6 additions & 3 deletions lib/rust/parser/generate-java/src/serialization.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ const CODE_GETTER: &str = "codeRepr";
const WHITESPACE_GETTER: &str = "getWhitespace";
const TREE_BEGIN: &str = "fieldSpanLeftOffsetCodeReprBegin";
const TREE_LEN: &str = "fieldSpanLeftOffsetCodeReprLen";
const TREE_WHITESPACE: &str = "fieldSpanLeftOffsetCodeLenUtf16";
const TOKEN_WHITESPACE: &str = "fieldLeftOffsetCodeLenUtf16";
const TOKEN_CODE_LENGTH: &str = "fieldCodeLenUtf16";

/// Derive deserialization for all types in the typegraph.
pub fn derive(graph: &mut TypeGraph, tree: ClassId, token: ClassId) {
Expand Down Expand Up @@ -151,16 +154,16 @@ fn start_whitespace() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'sta
|MaterializerInput { message }| format!("{message}.position()")
}
fn start_code_tree() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldSpanLeftOffsetCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TREE_WHITESPACE})")
}
fn end_code_tree() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.position()")
}
fn start_code_token() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldLeftOffsetCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TOKEN_WHITESPACE})")
}
fn end_code_token() -> impl for<'a> Fn(MaterializerInput<'a>) -> String + 'static {
|MaterializerInput { message }| format!("{message}.advance(fieldCodeUtf16)")
|MaterializerInput { message }| format!("{message}.advance({TOKEN_CODE_LENGTH})")
}


Expand Down
Loading
Loading