Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use lazy-regex crate instead of plain regex #27

Merged
merged 1 commit into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ anyhow = "1.0.79"
clap = { version = "4.4.16", features = ["derive"] }
colored = "2.1.0"
itertools = "0.12.0"
regex = "1.10.2"
lazy-regex = "3.3.0"
textwrap = "0.16.0"
tree-sitter = "~0.22.6"
tree-sitter-fortran = "0.0.1"
Expand Down
8 changes: 2 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mod settings;
mod test_utils;
use anyhow::Context;
use colored::Colorize;
use regex::Regex;
use lazy_regex::regex_captures;
use settings::Settings;
use std::cmp::Ordering;
use std::fmt;
Expand Down Expand Up @@ -74,12 +74,8 @@ impl Code {
}

pub fn from(code_str: &str) -> anyhow::Result<Self> {
let re = Regex::new(r"^([A-Z]+)(\d{3})$")?;
let captures = re
.captures(code_str)
let (_, category_str, number_str) = regex_captures!(r"^([A-Z]+)([0-9]{3})$", code_str)
.context(format!("{} is not a valid error code.", code_str))?;
let category_str = captures.get(1).map_or("", |x| x.as_str());
let number_str = captures.get(2).map_or("", |x| x.as_str());
let category = Category::from(category_str)?;
let number = number_str.parse::<usize>()?;
Ok(Code::new(category, number))
Expand Down
13 changes: 5 additions & 8 deletions src/rules/style/line_length.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,20 @@
use crate::settings::Settings;
use crate::violation;
use crate::{Method, Rule, Violation};
use regex::Regex;
use lazy_regex::regex_is_match;
/// Defines rules that govern line length.

pub struct LineTooLong {}

fn line_too_long(source: &str, settings: &Settings) -> Vec<Violation> {
let mut violations = Vec::new();

// Are we ending on a string or comment? If so, we'll allow it through, as
// it may contain something like a long URL that cannot be reasonably split
// across multiple lines.
let re = Regex::new(r#"(["']\w*&?$)|(!.*$)|(^\w*&)"#).unwrap();

for (idx, line) in source.split('\n').enumerate() {
let len = line.len();
if len > settings.line_length {
if re.is_match(line) {
// Are we ending on a string or comment? If so, we'll allow it through, as it may
// contain something like a long URL that cannot be reasonably split across multiple
// lines.
if regex_is_match!(r#"(["']\w*&?$)|(!.*$)|(^\w*&)"#, line) {
continue;
}
let msg = format!(
Expand Down
5 changes: 2 additions & 3 deletions src/rules/typing/literal_kinds.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{Method, Rule, Violation};
use regex::Regex;
use lazy_regex::regex_is_match;
use tree_sitter::{Node, Query};
/// Defines rules that discourage the use of raw number literals as kinds, as this can result in
/// non-portable code.
Expand Down Expand Up @@ -139,7 +139,6 @@ fn literal_kind_suffix(root: &Node, src: &str) -> Vec<Violation> {
let mut violations = Vec::new();
// Given a number literal, match anything suffixed with plain number.
// TODO Match either int or real, change error message accordingly
let re = Regex::new(r"_\d+$").unwrap();

let query_txt = "(number_literal) @num";
let query = Query::new(&tree_sitter_fortran::language(), query_txt).unwrap();
Expand All @@ -149,7 +148,7 @@ fn literal_kind_suffix(root: &Node, src: &str) -> Vec<Violation> {
let txt = capture.node.utf8_text(src.as_bytes());
match txt {
Ok(x) => {
if re.is_match(x) {
if regex_is_match!(r"_\d+$", x) {
let msg = format!(
"Instead of number literal suffix in {}, use parameter suffix \
from 'iso_fortran_env'",
Expand Down
14 changes: 6 additions & 8 deletions src/rules/typing/real_precision.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{Method, Rule, Violation};
use regex::Regex;
use lazy_regex::regex_is_match;
use tree_sitter::{Node, Query};
/// Defines rules that ensure real precision is always explicit and stated in a portable way.

Expand Down Expand Up @@ -77,12 +77,6 @@ pub struct NoRealSuffix {}

fn no_real_suffix(root: &Node, src: &str) -> Vec<Violation> {
let mut violations = Vec::new();
// Given a number literal, match anything with a decimal place, some amount of
// digits either side, and no suffix. This will not catch exponentiation.
// Tree sitter will also not include a + or - prefix within the number literal,
// considering this to be a unary operator.
let re = Regex::new(r"^\d*\.\d*$").unwrap();

let query_txt = "(number_literal) @num";
let query = Query::new(&tree_sitter_fortran::language(), query_txt).unwrap();
let mut cursor = tree_sitter::QueryCursor::new();
Expand All @@ -91,7 +85,11 @@ fn no_real_suffix(root: &Node, src: &str) -> Vec<Violation> {
let txt = capture.node.utf8_text(src.as_bytes());
match txt {
Ok(x) => {
if re.is_match(x) {
// Given a number literal, match anything with a decimal place, some amount of
// digits either side, and no suffix. This will not catch exponentiation. Tree
// sitter will also not include a + or - prefix within the number literal,
// considering this to be a unary operator.
if regex_is_match!(r"^\d*\.\d*$", x) {
let msg = format!(
"Floating point literal {} specified without a kind suffix",
x,
Expand Down
12 changes: 5 additions & 7 deletions src/rules/typing/star_kinds.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{Method, Rule, Violation};
use regex::Regex;
use lazy_regex::regex_is_match;
use tree_sitter::{Node, Query};
/// Defines rules that discourage the use of the non-standard kind specifiers such as
/// `int*4` or `real*8`. Also prefers the use of `character(len=*)` to
Expand All @@ -12,11 +12,6 @@ fn star_kind(root: &Node, src: &str) -> Vec<Violation> {
// Note: This does not match 'character*(*)', which should be handled by a different
// rule.
let mut violations = Vec::new();
// Match anything beginning with a '*' followed by any amount of whitespace or '&'
// symbols (in case you like to split your type specifiers over multiple lines),
// followed by at least one digit.
let re = Regex::new(r"^\*[\s&]*\d+").unwrap();

for query_type in ["function_statement", "variable_declaration"] {
let query_txt = format!("({} (intrinsic_type) (size) @size)", query_type);
let query = Query::new(&tree_sitter_fortran::language(), &query_txt).unwrap();
Expand All @@ -25,7 +20,10 @@ fn star_kind(root: &Node, src: &str) -> Vec<Violation> {
for capture in match_.captures {
match capture.node.utf8_text(src.as_bytes()) {
Ok(x) => {
if re.is_match(x) {
// Match anything beginning with a '*' followed by any amount of whitespace
// or '&' symbols (in case you like to split your type specifiers over
// multiple lines), followed by at least one digit.
if regex_is_match!(r"^\*[\s&]*\d+", x) {
let msg = "Avoid non-standard 'type*N', prefer 'type(N)'";
violations.push(Violation::from_node(msg, &capture.node));
}
Expand Down
Loading