Skip to content

Commit

Permalink
[red-knot] Report line numbers in mdtest relative to the markdown fil…
Browse files Browse the repository at this point in the history
…e, not the test snippet (#13804)

Co-authored-by: Alex Waygood <[email protected]>
Co-authored-by: Micha Reiser <[email protected]>
Co-authored-by: Carl Meyer <[email protected]>
  • Loading branch information
4 people authored Oct 22, 2024
1 parent 9d10279 commit cd6c937
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 57 deletions.
112 changes: 72 additions & 40 deletions crates/red_knot_test/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
use colored::Colorize;
use parser as test_parser;
use red_knot_python_semantic::types::check_types;
use ruff_db::files::{system_path_to_file, Files};
use ruff_db::files::{system_path_to_file, File, Files};
use ruff_db::parsed::parsed_module;
use ruff_db::system::{DbWithTestSystem, SystemPathBuf};
use std::collections::BTreeMap;
use ruff_source_file::LineIndex;
use ruff_text_size::TextSize;
use std::path::Path;

type Failures = BTreeMap<SystemPathBuf, matcher::FailuresByLine>;

mod assertion;
mod db;
mod diagnostic;
Expand Down Expand Up @@ -40,61 +39,94 @@ pub fn run(path: &Path, title: &str) {
any_failures = true;
println!("\n{}\n", test.name().bold().underline());

for (path, by_line) in failures {
println!("{}", path.as_str().bold());
for (line_number, failures) in by_line.iter() {
let md_index = LineIndex::from_source_text(&source);

for test_failures in failures {
let backtick_line = md_index.line_index(test_failures.backtick_offset);

for (relative_line_number, failures) in test_failures.by_line.iter() {
for failure in failures {
let line_info = format!("line {line_number}:").cyan();
let absolute_line_number =
backtick_line.checked_add(relative_line_number).unwrap();
let line_info = format!("{title}:{absolute_line_number}").cyan();
println!(" {line_info} {failure}");
}
}
println!();
}
}
}

println!("{}\n", "-".repeat(50));
println!("\n{}\n", "-".repeat(50));

assert!(!any_failures, "Some tests failed.");
}

fn run_test(db: &mut db::Db, test: &parser::MarkdownTest) -> Result<(), Failures> {
let workspace_root = db.workspace_root().to_path_buf();

let mut system_paths = vec![];

for file in test.files() {
assert!(
matches!(file.lang, "py" | "pyi"),
"Non-Python files not supported yet."
);
let full_path = workspace_root.join(file.path);
db.write_file(&full_path, file.code).unwrap();
system_paths.push(full_path);
}

let mut failures = BTreeMap::default();

for path in system_paths {
let file = system_path_to_file(db, path.clone()).unwrap();
let parsed = parsed_module(db, file);

// TODO allow testing against code with syntax errors
assert!(
parsed.errors().is_empty(),
"Python syntax errors in {}, {:?}: {:?}",
test.name(),
path,
parsed.errors()
);
let test_files: Vec<_> = test
.files()
.map(|embedded| {
assert!(
matches!(embedded.lang, "py" | "pyi"),
"Non-Python files not supported yet."
);
let full_path = workspace_root.join(embedded.path);
db.write_file(&full_path, embedded.code).unwrap();
let file = system_path_to_file(db, full_path).unwrap();

TestFile {
file,
backtick_offset: embedded.md_offset,
}
})
.collect();

let failures: Failures = test_files
.into_iter()
.filter_map(|test_file| {
let parsed = parsed_module(db, test_file.file);

// TODO allow testing against code with syntax errors
assert!(
parsed.errors().is_empty(),
"Python syntax errors in {}, {}: {:?}",
test.name(),
test_file.file.path(db),
parsed.errors()
);

match matcher::match_file(db, test_file.file, check_types(db, test_file.file)) {
Ok(()) => None,
Err(line_failures) => Some(FileFailures {
backtick_offset: test_file.backtick_offset,
by_line: line_failures,
}),
}
})
.collect();

matcher::match_file(db, file, check_types(db, file)).unwrap_or_else(|line_failures| {
failures.insert(path, line_failures);
});
}
if failures.is_empty() {
Ok(())
} else {
Err(failures)
}
}

type Failures = Vec<FileFailures>;

/// The failures for a single file in a test by line number.
struct FileFailures {
/// The offset of the backticks that starts the code block in the Markdown file
backtick_offset: TextSize,
/// The failures by lines in the code block.
by_line: matcher::FailuresByLine,
}

/// File in a test.
struct TestFile {
file: File,

// Offset of the backticks that starts the code block in the Markdown file
backtick_offset: TextSize,
}
48 changes: 31 additions & 17 deletions crates/red_knot_test/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
use std::sync::LazyLock;

use memchr::memchr2;
use regex::{Captures, Match, Regex};
use ruff_index::{newtype_index, IndexVec};
use rustc_hash::{FxHashMap, FxHashSet};
use std::sync::LazyLock;

use ruff_index::{newtype_index, IndexVec};
use ruff_python_trivia::Cursor;
use ruff_text_size::{TextLen, TextSize};

/// Parse the Markdown `source` as a test suite with given `title`.
pub(crate) fn parse<'s>(title: &'s str, source: &'s str) -> anyhow::Result<MarkdownTestSuite<'s>> {
Expand Down Expand Up @@ -132,6 +136,9 @@ pub(crate) struct EmbeddedFile<'s> {
pub(crate) path: &'s str,
pub(crate) lang: &'s str,
pub(crate) code: &'s str,

/// The offset of the backticks beginning the code block within the markdown file
pub(crate) md_offset: TextSize,
}

/// Matches a sequence of `#` characters, followed by a title heading, followed by a newline.
Expand Down Expand Up @@ -185,7 +192,9 @@ struct Parser<'s> {
files: IndexVec<EmbeddedFileId, EmbeddedFile<'s>>,

/// The unparsed remainder of the Markdown source.
unparsed: &'s str,
cursor: Cursor<'s>,

source_len: TextSize,

/// Stack of ancestor sections.
stack: SectionStack,
Expand All @@ -205,7 +214,8 @@ impl<'s> Parser<'s> {
Self {
sections,
files: IndexVec::default(),
unparsed: source,
cursor: Cursor::new(source),
source_len: source.text_len(),
stack: SectionStack::new(root_section_id),
current_section_files: None,
}
Expand All @@ -227,26 +237,23 @@ impl<'s> Parser<'s> {
}

fn parse_impl(&mut self) -> anyhow::Result<()> {
while let Some(position) = memchr2(b'`', b'#', self.unparsed.as_bytes()) {
let (before, after) = self.unparsed.split_at(position);
self.unparsed = after;
while let Some(position) = memchr2(b'`', b'#', self.cursor.as_bytes()) {
self.cursor.skip_bytes(position.saturating_sub(1));

// code blocks and headers must start on a new line.
if before.is_empty() || before.ends_with('\n') {
let c = after.as_bytes()[0] as char;

match c {
if position == 0 || self.cursor.eat_char('\n') {
match self.cursor.first() {
'#' => {
if let Some(find) = HEADER_RE.find(self.unparsed) {
if let Some(find) = HEADER_RE.find(self.cursor.as_str()) {
self.parse_header(find.as_str())?;
self.unparsed = &self.unparsed[find.end()..];
self.cursor.skip_bytes(find.len());
continue;
}
}
'`' => {
if let Some(captures) = CODE_RE.captures(self.unparsed) {
if let Some(captures) = CODE_RE.captures(self.cursor.as_str()) {
self.parse_code_block(&captures)?;
self.unparsed = &self.unparsed[captures.get(0).unwrap().end()..];
self.cursor.skip_bytes(captures.get(0).unwrap().len());
continue;
}
}
Expand All @@ -255,8 +262,8 @@ impl<'s> Parser<'s> {
}

// Skip to the end of the line
if let Some(position) = memchr::memchr(b'\n', self.unparsed.as_bytes()) {
self.unparsed = &self.unparsed[position + 1..];
if let Some(position) = memchr::memchr(b'\n', self.cursor.as_bytes()) {
self.cursor.skip_bytes(position);
} else {
break;
}
Expand Down Expand Up @@ -336,6 +343,8 @@ impl<'s> Parser<'s> {
.unwrap_or_default(),
// CODE_RE can't match without matches for 'lang' and 'code'.
code: captures.name("code").unwrap().into(),

md_offset: self.offset(),
});

if let Some(current_files) = &mut self.current_section_files {
Expand Down Expand Up @@ -368,6 +377,11 @@ impl<'s> Parser<'s> {
self.current_section_files = None;
}
}

/// Retrieves the current offset of the cursor within the source code.
fn offset(&self) -> TextSize {
self.source_len - self.cursor.text_len()
}
}

#[cfg(test)]
Expand Down
19 changes: 19 additions & 0 deletions crates/ruff_python_trivia/src/cursor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@ impl<'a> Cursor<'a> {
self.chars.clone()
}

/// Returns the remaining input as byte slice.
pub fn as_bytes(&self) -> &'a [u8] {
self.as_str().as_bytes()
}

/// Returns the remaining input as string slice.
pub fn as_str(&self) -> &'a str {
self.chars.as_str()
}

/// Peeks the next character from the input stream without consuming it.
/// Returns [`EOF_CHAR`] if the file is at the end of the file.
pub fn first(&self) -> char {
Expand Down Expand Up @@ -110,4 +120,13 @@ impl<'a> Cursor<'a> {
self.bump_back();
}
}

/// Skips the next `count` bytes.
///
/// ## Panics
/// - If `count` is larger than the remaining bytes in the input stream.
/// - If `count` indexes into a multi-byte character.
pub fn skip_bytes(&mut self, count: usize) {
self.chars = self.chars.as_str()[count..].chars();
}
}
12 changes: 12 additions & 0 deletions crates/ruff_source_file/src/line_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,18 @@ impl OneIndexed {
None => Self::MIN,
}
}

/// Checked addition. Returns `None` if overflow occurred.
#[must_use]
pub fn checked_add(self, rhs: Self) -> Option<Self> {
self.0.checked_add(rhs.0.get()).map(Self)
}

/// Checked subtraction. Returns `None` if overflow occurred.
#[must_use]
pub fn checked_sub(self, rhs: Self) -> Option<Self> {
self.0.get().checked_sub(rhs.get()).and_then(Self::new)
}
}

impl fmt::Display for OneIndexed {
Expand Down

0 comments on commit cd6c937

Please sign in to comment.