Skip to content

Commit

Permalink
refactor: doc string parse to prevent the regex calling and remove th…
Browse files Browse the repository at this point in the history
…e prec2 deps (#1219)

refactor: doc string parse to prevent the regex calling

Signed-off-by: peefy <[email protected]>
  • Loading branch information
Peefy authored Apr 15, 2024
1 parent f5c3efd commit bc61341
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 62 deletions.
1 change: 0 additions & 1 deletion kclvm/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion kclvm/sema/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ petgraph = "0.6.0"
anyhow = "1.0"
regex = "1.7.0"
lazy_static = "1.4.0"
pcre2 = "*"

kclvm-ast = { path = "../ast" }
kclvm-ast-pretty = { path = "../ast_pretty" }
Expand Down
95 changes: 35 additions & 60 deletions kclvm/sema/src/resolver/doc.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,29 @@
use kclvm_ast::ast::SchemaStmt;
use pcre2::bytes::Regex;
use std::collections::{HashMap, HashSet};
use std::iter::Iterator;
use std::str;

lazy_static::lazy_static! {
static ref RE: Regex = Regex::new(r#"(?s)^(['\"]{3})(.*?)(['\"]{3})$"#).unwrap();
}

/// strip leading and trailing triple quotes from the original docstring content
fn strip_quotes(original: &mut String) {
let quote = original.chars().next().unwrap();
if quote != '"' && quote != '\'' {
return;
}
if let Ok(Some(mat)) = RE.find(original.as_bytes()) {
let content = str::from_utf8(&original.as_bytes()[mat.start() + 3..mat.end() - 3])
.unwrap()
.to_owned();
*original = content;
const SINGLE_QUOTES_STR: &str = "'''";
const DOUBLE_QUOTES_STR: &str = "\"\"\"";

/// Strip leading and trailing triple quotes from the original docstring content
fn strip_quotes(original: &str) -> &str {
match original.chars().next() {
Some('\'') => match original.strip_prefix(SINGLE_QUOTES_STR) {
Some(s) => match s.strip_suffix(SINGLE_QUOTES_STR) {
Some(s) => s,
None => original,
},
None => original,
},
Some('"') => match original.strip_prefix(DOUBLE_QUOTES_STR) {
Some(s) => match s.strip_suffix(DOUBLE_QUOTES_STR) {
Some(s) => s,
None => original,
},
None => original,
},
_ => original,
}
}

Expand All @@ -27,7 +32,7 @@ fn expand_tabs(s: &str, spaces_per_tab: usize) -> String {
}

/// Clean up indentation by removing any common leading whitespace on all lines after the first line.
fn clean_doc(doc: &mut String) {
fn clean_doc(doc: &str) -> String {
let tab_expanded = expand_tabs(&doc, 4);
let mut lines: Vec<&str> = tab_expanded.split('\n').collect();
// Find minimum indentation of any non-blank lines after first line.
Expand Down Expand Up @@ -60,7 +65,7 @@ fn clean_doc(doc: &mut String) {
lines.remove(0);
}
}
*doc = lines.join("\n");
lines.join("\n")
}

/// A line-based string reader.
Expand Down Expand Up @@ -158,27 +163,6 @@ impl Reader {
}
}

/// remove the leading and trailing empty lines
fn _strip(doc: Vec<String>) -> Vec<String> {
let mut i = 0;
let mut j = 0;
for (line_num, line) in doc.iter().enumerate() {
if !line.trim().is_empty() {
i = line_num;
break;
}
}

for (line_num, line) in doc.iter().enumerate().rev() {
if !line.trim().is_empty() {
j = line_num;
break;
}
}

doc[i..j + 1].to_vec()
}

/// Checks if current line is at the beginning of a section
fn is_at_section(doc: &mut Reader) -> bool {
doc.seek_next_non_empty_line();
Expand All @@ -202,7 +186,7 @@ fn is_at_section(doc: &mut Reader) -> bool {
l2.starts_with(&"-".repeat(l1.len())) || l2.starts_with(&"=".repeat(l1.len()))
}

/// read lines before next section beginning, continuous empty lines will be merged to one
/// Reads lines before next section beginning, continuous empty lines will be merged to one
fn read_to_next_section(doc: &mut Reader) -> Vec<String> {
let mut section = doc.read_to_next_empty_line();

Expand All @@ -215,7 +199,7 @@ fn read_to_next_section(doc: &mut Reader) -> Vec<String> {
section
}

/// parse the Attribute Section of the docstring to list of Attribute
/// Parse the Attribute Section of the docstring to list of Attribute
fn parse_attr_list(content: String) -> Vec<Attribute> {
let mut r = Reader::new(content);
let mut attrs = vec![];
Expand All @@ -239,7 +223,7 @@ fn parse_attr_list(content: String) -> Vec<Attribute> {
attrs
}

/// parse the summary of the schema. The final summary content will be a concat of lines in the original summary with whitespace.
/// Parse the summary of the schema. The final summary content will be a concat of lines in the original summary with whitespace.
fn parse_summary(doc: &mut Reader) -> String {
if is_at_section(doc) {
// no summary provided
Expand All @@ -255,17 +239,14 @@ fn parse_summary(doc: &mut Reader) -> String {
.to_string()
}

/// parse the schema docstring to Doc.
/// Parse the schema docstring to Doc.
/// The summary of the schema content will be concatenated to a single line string by whitespaces.
/// The description of each attribute will be returned as separate lines.
pub fn parse_doc_string(ori: &String) -> Doc {
pub fn parse_doc_string(ori: &str) -> Doc {
if ori.is_empty() {
return Doc::new("".to_string(), vec![], HashMap::new());
}
let mut ori = ori.clone();
strip_quotes(&mut ori);
clean_doc(&mut ori);
let mut doc = Reader::new(ori);
let mut doc = Reader::new(clean_doc(strip_quotes(&ori)));
doc.reset();
let summary = parse_summary(&mut doc);

Expand Down Expand Up @@ -441,17 +422,13 @@ de",
];

for (ori, res) in oris.iter().zip(results.iter()) {
let from = &mut ori.to_string();
strip_quotes(from);
assert_eq!(from.to_string(), res.to_string());
assert_eq!(strip_quotes(ori).to_string(), res.to_string());
}
}

#[test]
fn test_clean_doc() {
let mut ori = read_doc_content();
strip_quotes(&mut ori);
clean_doc(&mut ori);
let ori = clean_doc(strip_quotes(&read_doc_content()));
let expect_cleaned = r#"Server is the common user interface for long-running
services adopting the best practice of Kubernetes.
Expand Down Expand Up @@ -566,14 +543,13 @@ unindented line

#[test]
fn test_at_section() {
let mut data = "Summary
let data = "Summary
Attribute
---------
description"
.to_string();

clean_doc(&mut data);

let data = clean_doc(&data);
let mut doc = Reader::new(data);
assert!(!is_at_section(&mut doc));

Expand All @@ -586,7 +562,7 @@ unindented line

#[test]
fn test_read_to_next_section() {
let mut data = "Summary
let data = "Summary
SummaryContinue
Expand All @@ -610,8 +586,7 @@ unindented line
--------
content"
.to_string();
clean_doc(&mut data);

let data = clean_doc(&data);
let mut doc = Reader::new(data);
assert_eq!(
read_to_next_section(&mut doc),
Expand Down

0 comments on commit bc61341

Please sign in to comment.