Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimise tree sitter usage #31

Merged
merged 4 commits into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 103 additions & 40 deletions src/check.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::cli::CheckArgs;
use crate::rules::{default_ruleset, rulemap, RuleBox, RuleSet};
use crate::rules::{default_ruleset, entrypoint_map, EntryPointMap, RuleSet};
use crate::settings::Settings;
use crate::violation;
use crate::{Category, Code, Diagnostic, Method, Violation};
Expand All @@ -8,6 +8,7 @@ use colored::Colorize;
use itertools::{chain, join};
use std::fs::read_to_string;
use std::path::{Path, PathBuf};
use tree_sitter::{Node, Parser};
use walkdir::WalkDir;

/// Get the list of active rules for this session.
Expand Down Expand Up @@ -59,32 +60,84 @@ fn get_files(files_in: &Vec<PathBuf>) -> Vec<PathBuf> {
paths
}

fn tree_rules(
entrypoints: &EntryPointMap,
node: &Node,
src: &str,
) -> anyhow::Result<Vec<(String, Violation)>> {
let mut violations = Vec::new();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
let empty = vec![];
let rules = entrypoints.get(child.kind()).unwrap_or(&empty);
for (code, rule) in rules {
match rule.method() {
Method::Tree(f) => {
if let Some(violation) = f(&child, src) {
violations.push((code.clone(), violation));
}
}
_ => {
anyhow::bail!("Non-Tree rule {} has incorrect entrypoints", code);
}
}
}
violations.extend(tree_rules(entrypoints, &child, src)?);
}
Ok(violations)
}

/// Parse a file, check it for issues, and return the report.
fn check_file(rule: &RuleBox, path: &Path, settings: &Settings) -> anyhow::Result<Vec<Violation>> {
fn check_file(
entrypoints: &EntryPointMap,
path: &Path,
parser: &mut Parser,
settings: &Settings,
) -> anyhow::Result<Vec<(String, Violation)>> {
let mut violations = Vec::new();

// Check path itself
let empty_path_rules = vec![];
let path_rules = entrypoints.get("PATH").unwrap_or(&empty_path_rules);
for (code, rule) in path_rules {
match rule.method() {
Method::Path(f) => {
if let Some(violation) = f(path) {
violations.push((code.clone(), violation));
}
}
_ => {
anyhow::bail!("Non-Path rule {} has incorrect entrypoints", code);
}
}
}

// Perform plain text analysis
let empty_text_rules = vec![];
let text_rules = entrypoints.get("TEXT").unwrap_or(&empty_text_rules);
let source = read_to_string(path)?;
let mut parser = tree_sitter::Parser::new();
parser
.set_language(&tree_sitter_fortran::language())
.expect("Error loading Fortran grammar");
for (code, rule) in text_rules {
match rule.method() {
Method::Text(f) => {
violations.extend(
f(&source, settings)
.iter()
.map(|x| (code.clone(), x.clone())),
);
}
_ => {
anyhow::bail!("Non-Text rule {} has incorrect entrypoints", code);
}
}
}

// Perform concrete syntax tree analysis
let tree = parser
.parse(&source, None)
.context("Could not parse file")?;
let root = tree.root_node();
violations.extend(tree_rules(entrypoints, &root, &source)?);

let mut violations = Vec::new();
match rule.method() {
Method::Path(f) => {
if let Some(violation) = f(path) {
violations.push(violation);
}
}
Method::Tree(f) => {
violations.extend(f(&root, &source));
}
Method::Text(f) => {
violations.extend(f(&source, settings));
}
}
Ok(violations)
}

Expand All @@ -94,36 +147,46 @@ pub fn check(args: CheckArgs) -> i32 {
line_length: args.line_length,
};
let ruleset = get_ruleset(&args);
match rulemap(&ruleset) {
Ok(rules) => {
let mut diagnostics = Vec::new();
let mut parser = Parser::new();
parser
.set_language(&tree_sitter_fortran::language())
.expect("Error loading Fortran grammar");
match entrypoint_map(&ruleset) {
Ok(entrypoints) => {
let mut total_errors = 0;
for file in get_files(&args.files) {
for (code, rule) in &rules {
match check_file(rule, &file, &settings) {
Ok(violations) => {
diagnostics
.extend(violations.iter().map(|x| Diagnostic::new(&file, code, x)));
}
Err(msg) => {
let err_code = Code::new(Category::Error, 0).to_string();
let err_msg = format!("Failed to process: {}", msg);
let violation = violation!(&err_msg);
diagnostics.push(Diagnostic::new(&file, &err_code, &violation));
}
match check_file(&entrypoints, &file, &mut parser, &settings) {
Ok(violations) => {
let mut diagnostics: Vec<Diagnostic> = violations
.into_iter()
.map(|(c, v)| Diagnostic::new(&file, c, &v))
.collect();
diagnostics.sort_unstable();
println!("{}", join(&diagnostics, "\n"));
total_errors += diagnostics.len();
}
Err(msg) => {
let err_code = Code::new(Category::Error, 0).to_string();
let err_msg = format!("Failed to process: {}", msg);
let violation = violation!(&err_msg);
let diagnostic = Diagnostic::new(&file, &err_code, &violation);
println!("{}", diagnostic);
total_errors += 1;
}
}
}
if diagnostics.is_empty() {
if total_errors == 0 {
0
} else {
diagnostics.sort_unstable();
println!("{}", join(&diagnostics, "\n"));
println!("Number of errors: {}", diagnostics.len());
let err_no = format!("Number of errors: {}", total_errors.to_string().bold());
let info = "For more information, run:";
let explain = format!("{} {}", "fortitude explain", "[ERROR_CODES]".bold());
println!("\n-- {}\n-- {}\n\n {}\n", err_no, info, explain);
1
}
}
Err(msg) => {
eprintln!("{}: {}", "Error:".bright_red(), msg);
eprintln!("{}: {}", "INTERNAL ERROR:".bright_red(), msg);
1
}
}
Expand Down
7 changes: 6 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ pub enum Method {
/// Methods that work on just the path name of the file.
Path(fn(&Path) -> Option<Violation>),
/// Methods that analyse the syntax tree.
Tree(fn(&tree_sitter::Node, &str) -> Vec<Violation>),
Tree(fn(&tree_sitter::Node, &str) -> Option<Violation>),
/// Methods that analyse lines of code directly, using regex or otherwise.
Text(fn(&str, &Settings) -> Vec<Violation>),
}
Expand All @@ -176,6 +176,11 @@ pub trait Rule {
/// Return text explaining what the rule tests for, why this is important, and how
/// the user might fix it.
fn explain(&self) -> &str;

/// Return list of tree-sitter node types on which a rule should trigger.
/// Path-based rules should return a vector containing only "PATH" while
/// text-based rules should return a vector containing only "TEXT".
fn entrypoints(&self) -> Vec<&str>;
}

// Violation diagnostics
Expand Down
16 changes: 6 additions & 10 deletions src/rules/error/syntax_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,8 @@ use tree_sitter::Node;

/// Rules that check for syntax errors.

fn syntax_error(node: &Node, _src: &str) -> Vec<Violation> {
let mut violations = Vec::new();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
if child.is_error() {
violations.push(Violation::from_node("syntax error", &child));
}
violations.extend(syntax_error(&child, _src));
}
violations
fn syntax_error(node: &Node, _src: &str) -> Option<Violation> {
Some(Violation::from_node("syntax_error", node))
}

pub struct SyntaxError {}
Expand All @@ -33,4 +25,8 @@ impl Rule for SyntaxError {
bug in our code or in our parser!
"
}

fn entrypoints(&self) -> Vec<&str> {
vec!["ERROR"]
}
}
4 changes: 4 additions & 0 deletions src/rules/filesystem/extensions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ impl Rule for NonStandardFileExtension {
by some compilers and build tools.
"
}

fn entrypoints(&self) -> Vec<&str> {
vec!["PATH"]
}
}

#[cfg(test)]
Expand Down
30 changes: 27 additions & 3 deletions src/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@ mod modules;
mod style;
mod typing;
use crate::{Category, Code, Rule};
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet};
/// A collection of all rules, and utilities to select a subset at runtime.

pub type RuleBox = Box<dyn Rule>;
pub type RuleSet = HashSet<String>;
pub type RuleMap = HashMap<String, RuleBox>;
pub type RuleSet = BTreeSet<String>;
pub type RuleMap = BTreeMap<String, RuleBox>;
pub type EntryPointMap = BTreeMap<String, Vec<(String, RuleBox)>>;

/// Create a new `Rule` given a rule code, expressed as a string.
pub fn build_rule(code_str: &str) -> anyhow::Result<RuleBox> {
Expand Down Expand Up @@ -107,3 +108,26 @@ pub fn rulemap(set: &RuleSet) -> anyhow::Result<RuleMap> {
}
Ok(rules)
}

/// Map tree-sitter node types to the rules that operate over them
pub fn entrypoint_map(set: &RuleSet) -> anyhow::Result<EntryPointMap> {
let mut map = EntryPointMap::new();
for code in set {
let rule = build_rule(code)?;
let entrypoints = rule.entrypoints();
for entrypoint in entrypoints {
match map.get_mut(entrypoint) {
Some(rule_vec) => {
rule_vec.push((code.to_string(), build_rule(code)?));
}
None => {
map.insert(
entrypoint.to_string(),
vec![(code.to_string(), build_rule(code)?)],
);
}
}
}
}
Ok(map)
}
31 changes: 11 additions & 20 deletions src/rules/modules/external_functions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use tree_sitter::Node;
/// Defines rules that check whether functions and subroutines are defined within modules (or one
/// of a few acceptable alternatives).

fn function_is_at_top_level(node: &Node, _src: &str) -> Option<Violation> {
fn external_function(node: &Node, _src: &str) -> Option<Violation> {
if node.parent()?.kind() == "translation_unit" {
let msg = format!(
"{} not contained within (sub)module or program",
Expand All @@ -15,21 +15,6 @@ fn function_is_at_top_level(node: &Node, _src: &str) -> Option<Violation> {
None
}

fn external_function(node: &Node, _src: &str) -> Vec<Violation> {
let mut violations = Vec::new();
let mut cursor = node.walk();
for child in node.named_children(&mut cursor) {
let kind = child.kind();
if kind == "function" || kind == "subroutine" {
if let Some(x) = function_is_at_top_level(&child, _src) {
violations.push(x);
}
}
violations.extend(external_function(&child, _src));
}
violations
}

pub struct ExternalFunction {}

impl Rule for ExternalFunction {
Expand All @@ -44,6 +29,10 @@ impl Rule for ExternalFunction {
defined outside of these scopes, and this is a common source of bugs.
"
}

fn entrypoints(&self) -> Vec<&str> {
vec!["function", "subroutine"]
}
}

#[cfg(test)]
Expand All @@ -54,7 +43,7 @@ mod tests {
use textwrap::dedent;

#[test]
fn test_function_not_in_module() {
fn test_function_not_in_module() -> Result<(), String> {
let source = dedent(
"
integer function double(x)
Expand All @@ -75,11 +64,12 @@ mod tests {
violation!(&msg, *line, *col)
})
.collect();
test_tree_method(external_function, source, Some(expected_violations));
test_tree_method(&ExternalFunction {}, source, Some(expected_violations))?;
Ok(())
}

#[test]
fn test_function_in_module() {
fn test_function_in_module() -> Result<(), String> {
let source = "
module my_module
implicit none
Expand All @@ -95,6 +85,7 @@ mod tests {
end subroutine
end module
";
test_tree_method(external_function, source, None);
test_tree_method(&ExternalFunction {}, source, None)?;
Ok(())
}
}
Loading