From 6265cf9b159db093788359249d0fa7c93d1c95f1 Mon Sep 17 00:00:00 2001 From: Daniel Hofstetter Date: Fri, 17 Mar 2023 08:55:08 +0100 Subject: [PATCH] uudoc,uucore_procs: move md parsing to help_parser --- Cargo.lock | 6 + Cargo.toml | 5 + src/bin/uudoc.rs | 82 +++---------- src/help_parser/Cargo.toml | 5 + src/help_parser/src/lib.rs | 236 ++++++++++++++++++++++++++++++++++++ src/uucore_procs/Cargo.toml | 1 + src/uucore_procs/src/lib.rs | 229 ++-------------------------------- 7 files changed, 282 insertions(+), 282 deletions(-) create mode 100644 src/help_parser/Cargo.toml create mode 100644 src/help_parser/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index c86edc12a2c..aaf52508675 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -327,6 +327,7 @@ dependencies = [ "conv", "filetime", "glob", + "help_parser", "hex-literal", "is-terminal", "libc", @@ -1036,6 +1037,10 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" +[[package]] +name = "help_parser" +version = "0.0.17" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -3355,6 +3360,7 @@ dependencies = [ name = "uucore_procs" version = "0.0.17" dependencies = [ + "help_parser", "proc-macro2", "quote", ] diff --git a/Cargo.toml b/Cargo.toml index 1331839b207..f3f0acbe819 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -359,6 +359,8 @@ selinux = { workspace=true, optional = true } textwrap = { workspace=true } zip = { workspace=true, optional = true } +help_parser = { path="src/help_parser" } + # * uutils uu_test = { optional=true, version="0.0.17", package="uu_test", path="src/uu/test" } # @@ -512,3 +514,6 @@ path = "src/bin/coreutils.rs" name = "uudoc" path = "src/bin/uudoc.rs" required-features = ["uudoc"] + +[package.metadata.cargo-udeps.ignore] +normal = ["help_parser"] diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs index 8276d2ae129..7d1a9cff55f 100644 --- a/src/bin/uudoc.rs +++ b/src/bin/uudoc.rs @@ -133,7 +133,7 @@ impl<'a, 'b> MDWriter<'a, 'b> { write!(self.w, "# {}\n\n", self.name)?; self.additional()?; self.usage()?; - self.description()?; + self.about()?; self.options()?; self.after_help()?; self.examples() @@ -177,54 +177,34 @@ impl<'a, 'b> MDWriter<'a, 'b> { } fn usage(&mut self) -> io::Result<()> { - writeln!(self.w, "\n```")?; - let mut usage: String = self - .command - .render_usage() - .to_string() - .lines() - .map(|l| l.strip_prefix("Usage:").unwrap_or(l)) - .map(|l| l.trim()) - .filter(|l| !l.is_empty()) - .collect::>() - .join("\n"); - usage = usage - .to_string() - .replace(uucore::execution_phrase(), self.name); - writeln!(self.w, "{}", usage)?; - writeln!(self.w, "```") - } + if let Some(markdown) = &self.markdown { + let usage = help_parser::parse_usage(&markdown); + let usage = usage.replace("{}", self.name); - fn description(&mut self) -> io::Result<()> { - if let Some(after_help) = self.markdown_section("about") { - return writeln!(self.w, "\n\n{}", after_help); + writeln!(self.w, "\n```")?; + writeln!(self.w, "{}", usage)?; + writeln!(self.w, "```") + } else { + Ok(()) } + } - if let Some(about) = self - .command - .get_long_about() - .or_else(|| self.command.get_about()) - { - writeln!(self.w, "{}", about) + fn about(&mut self) -> io::Result<()> { + if let Some(markdown) = &self.markdown { + writeln!(self.w, "{}", help_parser::parse_about(&markdown)) } else { Ok(()) } } fn after_help(&mut self) -> io::Result<()> { - if let Some(after_help) = self.markdown_section("after help") { - return writeln!(self.w, "\n\n{}", after_help); + if let Some(markdown) = &self.markdown { + if let Some(after_help) = help_parser::parse_section("after help", &markdown) { + return writeln!(self.w, "\n\n{after_help}"); + } } - if let Some(after_help) = self - .command - .get_after_long_help() - .or_else(|| self.command.get_after_help()) - { - writeln!(self.w, "\n\n{}", after_help) - } else { - Ok(()) - } + Ok(()) } fn examples(&mut self) -> io::Result<()> { @@ -327,32 +307,6 @@ impl<'a, 'b> MDWriter<'a, 'b> { } writeln!(self.w, "\n") } - - fn markdown_section(&self, section: &str) -> Option { - let md = self.markdown.as_ref()?; - let section = section.to_lowercase(); - - fn is_section_header(line: &str, section: &str) -> bool { - line.strip_prefix("##") - .map_or(false, |l| l.trim().to_lowercase() == section) - } - - let result = md - .lines() - .skip_while(|&l| !is_section_header(l, §ion)) - .skip(1) - .take_while(|l| !l.starts_with("##")) - .collect::>() - .join("\n") - .trim() - .to_string(); - - if !result.is_empty() { - Some(result) - } else { - None - } - } } fn get_zip_content(archive: &mut ZipArchive, name: &str) -> Option { diff --git a/src/help_parser/Cargo.toml b/src/help_parser/Cargo.toml new file mode 100644 index 00000000000..b0ed216691f --- /dev/null +++ b/src/help_parser/Cargo.toml @@ -0,0 +1,5 @@ +[package] +name = "help_parser" +version = "0.0.17" +edition = "2021" +license = "MIT" diff --git a/src/help_parser/src/lib.rs b/src/help_parser/src/lib.rs new file mode 100644 index 00000000000..8faa4e6ce4d --- /dev/null +++ b/src/help_parser/src/lib.rs @@ -0,0 +1,236 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +//! A collection of functions to parse the markdown code of help files. +//! +//! The structure of the markdown code is assumed to be: +//! +//! # util name +//! +//! ```text +//! usage info +//! ``` +//! +//! About text +//! +//! ## Section 1 +//! +//! Some content +//! +//! ## Section 2 +//! +//! Some content + +const MARKDOWN_CODE_FENCES: &str = "```"; + +/// Parses the text between the first markdown code block and the next header, if any, +/// into an about string. +pub fn parse_about(content: &str) -> String { + content + .lines() + .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) + .skip(1) + .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) + .skip(1) + .take_while(|l| !l.starts_with('#')) + .collect::>() + .join("\n") + .trim() + .to_string() +} + +/// Parses the first markdown code block into a usage string +/// +/// The code fences are removed and the name of the util is replaced +/// with `{}` so that it can be replaced with the appropriate name +/// at runtime. +pub fn parse_usage(content: &str) -> String { + content + .lines() + .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) + .skip(1) + .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) + .map(|l| { + // Replace the util name (assumed to be the first word) with "{}" + // to be replaced with the runtime value later. + if let Some((_util, args)) = l.split_once(' ') { + format!("{{}} {args}\n") + } else { + "{}\n".to_string() + } + }) + .collect::>() + .join("") + .trim() + .to_string() +} + +/// Get a single section from content +/// +/// The section must be a second level section (i.e. start with `##`). +pub fn parse_section(section: &str, content: &str) -> Option { + fn is_section_header(line: &str, section: &str) -> bool { + line.strip_prefix("##") + .map_or(false, |l| l.trim().to_lowercase() == section) + } + + let section = §ion.to_lowercase(); + + // We cannot distinguish between an empty or non-existing section below, + // so we do a quick test to check whether the section exists + if content.lines().all(|l| !is_section_header(l, section)) { + return None; + } + + // Prefix includes space to allow processing of section with level 3-6 headers + let section_header_prefix = "## "; + + Some( + content + .lines() + .skip_while(|&l| !is_section_header(l, section)) + .skip(1) + .take_while(|l| !l.starts_with(section_header_prefix)) + .collect::>() + .join("\n") + .trim() + .to_string(), + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_section() { + let input = "\ + # ls\n\ + ## some section\n\ + This is some section\n\ + \n\ + ## ANOTHER SECTION + This is the other section\n\ + with multiple lines\n"; + + assert_eq!( + parse_section("some section", input).unwrap(), + "This is some section" + ); + assert_eq!( + parse_section("SOME SECTION", input).unwrap(), + "This is some section" + ); + assert_eq!( + parse_section("another section", input).unwrap(), + "This is the other section\nwith multiple lines" + ); + } + + #[test] + fn test_parse_section_with_sub_headers() { + let input = "\ + # ls\n\ + ## after section\n\ + This is some section\n\ + \n\ + ### level 3 header\n\ + \n\ + Additional text under the section.\n\ + \n\ + #### level 4 header\n\ + \n\ + Yet another paragraph\n"; + + assert_eq!( + parse_section("after section", input).unwrap(), + "This is some section\n\n\ + ### level 3 header\n\n\ + Additional text under the section.\n\n\ + #### level 4 header\n\n\ + Yet another paragraph" + ); + } + + #[test] + fn test_parse_non_existing_section() { + let input = "\ + # ls\n\ + ## some section\n\ + This is some section\n\ + \n\ + ## ANOTHER SECTION + This is the other section\n\ + with multiple lines\n"; + + assert!(parse_section("non-existing section", input).is_none()); + } + + #[test] + fn test_parse_usage() { + let input = "\ + # ls\n\ + ```\n\ + ls -l\n\ + ```\n\ + ## some section\n\ + This is some section\n\ + \n\ + ## ANOTHER SECTION + This is the other section\n\ + with multiple lines\n"; + + assert_eq!(parse_usage(input), "{} -l"); + } + + #[test] + fn test_parse_multi_line_usage() { + let input = "\ + # ls\n\ + ```\n\ + ls -a\n\ + ls -b\n\ + ls -c\n\ + ```\n\ + ## some section\n\ + This is some section\n"; + + assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c"); + } + + #[test] + fn test_parse_about() { + let input = "\ + # ls\n\ + ```\n\ + ls -l\n\ + ```\n\ + \n\ + This is the about section\n\ + \n\ + ## some section\n\ + This is some section\n"; + + assert_eq!(parse_about(input), "This is the about section"); + } + + #[test] + fn test_parse_multi_line_about() { + let input = "\ + # ls\n\ + ```\n\ + ls -l\n\ + ```\n\ + \n\ + about a\n\ + \n\ + about b\n\ + \n\ + ## some section\n\ + This is some section\n"; + + assert_eq!(parse_about(input), "about a\n\nabout b"); + } +} diff --git a/src/uucore_procs/Cargo.toml b/src/uucore_procs/Cargo.toml index c61d6367343..d5db5749e81 100644 --- a/src/uucore_procs/Cargo.toml +++ b/src/uucore_procs/Cargo.toml @@ -18,3 +18,4 @@ proc-macro = true [dependencies] proc-macro2 = "1.0" quote = "1.0" +help_parser = { path="../help_parser", version="0.0.17" } diff --git a/src/uucore_procs/src/lib.rs b/src/uucore_procs/src/lib.rs index ab2458cebb5..a92ebdcf3bb 100644 --- a/src/uucore_procs/src/lib.rs +++ b/src/uucore_procs/src/lib.rs @@ -7,8 +7,6 @@ use std::{fs::File, io::Read, path::PathBuf}; use proc_macro::{Literal, TokenStream, TokenTree}; use quote::quote; -const MARKDOWN_CODE_FENCES: &str = "```"; - //## rust proc-macro background info //* ref: @@ //* ref: [path construction from LitStr](https://oschwald.github.io/maxminddb-rust/syn/struct.LitStr.html) @@ @@ -61,7 +59,7 @@ fn render_markdown(s: &str) -> String { pub fn help_about(input: TokenStream) -> TokenStream { let input: Vec = input.into_iter().collect(); let filename = get_argument(&input, 0, "filename"); - let text: String = parse_about(&read_help(&filename)); + let text: String = help_parser::parse_about(&read_help(&filename)); TokenTree::Literal(Literal::string(&text)).into() } @@ -75,7 +73,7 @@ pub fn help_about(input: TokenStream) -> TokenStream { pub fn help_usage(input: TokenStream) -> TokenStream { let input: Vec = input.into_iter().collect(); let filename = get_argument(&input, 0, "filename"); - let text: String = parse_usage(&read_help(&filename)); + let text: String = help_parser::parse_usage(&read_help(&filename)); TokenTree::Literal(Literal::string(&text)).into() } @@ -108,9 +106,15 @@ pub fn help_section(input: TokenStream) -> TokenStream { let input: Vec = input.into_iter().collect(); let section = get_argument(&input, 0, "section"); let filename = get_argument(&input, 1, "filename"); - let text = parse_help_section(§ion, &read_help(&filename)); - let rendered = render_markdown(&text); - TokenTree::Literal(Literal::string(&rendered)).into() + + if let Some(text) = help_parser::parse_section(§ion, &read_help(&filename)) { + let rendered = render_markdown(&text); + TokenTree::Literal(Literal::string(&rendered)).into() + } else { + panic!( + "The section '{section}' could not be found in the help file. Maybe it is spelled wrong?" + ) + } } /// Get an argument from the input vector of `TokenTree`. @@ -149,214 +153,3 @@ fn read_help(filename: &str) -> String { content } - -/// Get a single section from content -/// -/// The section must be a second level section (i.e. start with `##`). -fn parse_help_section(section: &str, content: &str) -> String { - fn is_section_header(line: &str, section: &str) -> bool { - line.strip_prefix("##") - .map_or(false, |l| l.trim().to_lowercase() == section) - } - - let section = §ion.to_lowercase(); - - // We cannot distinguish between an empty or non-existing section below, - // so we do a quick test to check whether the section exists to provide - // a nice error message. - if content.lines().all(|l| !is_section_header(l, section)) { - panic!( - "The section '{section}' could not be found in the help file. Maybe it is spelled wrong?" - ) - } - - // Prefix includes space to allow processing of section with level 3-6 headers - let section_header_prefix = "## "; - - content - .lines() - .skip_while(|&l| !is_section_header(l, section)) - .skip(1) - .take_while(|l| !l.starts_with(section_header_prefix)) - .collect::>() - .join("\n") - .trim() - .to_string() -} - -/// Parses the first markdown code block into a usage string -/// -/// The code fences are removed and the name of the util is replaced -/// with `{}` so that it can be replaced with the appropriate name -/// at runtime. -fn parse_usage(content: &str) -> String { - content - .lines() - .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) - .skip(1) - .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) - .map(|l| { - // Replace the util name (assumed to be the first word) with "{}" - // to be replaced with the runtime value later. - if let Some((_util, args)) = l.split_once(' ') { - format!("{{}} {args}\n") - } else { - "{}\n".to_string() - } - }) - .collect::>() - .join("") - .trim() - .to_string() -} - -/// Parses the text between the first markdown code block and the next header, if any, -/// into an about string. -fn parse_about(content: &str) -> String { - content - .lines() - .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) - .skip(1) - .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES)) - .skip(1) - .take_while(|l| !l.starts_with('#')) - .collect::>() - .join("\n") - .trim() - .to_string() -} - -#[cfg(test)] -mod tests { - use super::{parse_about, parse_help_section, parse_usage}; - - #[test] - fn section_parsing() { - let input = "\ - # ls\n\ - ## some section\n\ - This is some section\n\ - \n\ - ## ANOTHER SECTION - This is the other section\n\ - with multiple lines\n"; - - assert_eq!( - parse_help_section("some section", input), - "This is some section" - ); - assert_eq!( - parse_help_section("SOME SECTION", input), - "This is some section" - ); - assert_eq!( - parse_help_section("another section", input), - "This is the other section\nwith multiple lines" - ); - } - - #[test] - fn section_parsing_with_additional_headers() { - let input = "\ - # ls\n\ - ## after section\n\ - This is some section\n\ - \n\ - ### level 3 header\n\ - \n\ - Additional text under the section.\n\ - \n\ - #### level 4 header\n\ - \n\ - Yet another paragraph\n"; - - assert_eq!( - parse_help_section("after section", input), - "This is some section\n\n\ - ### level 3 header\n\n\ - Additional text under the section.\n\n\ - #### level 4 header\n\n\ - Yet another paragraph" - ); - } - - #[test] - #[should_panic] - fn section_parsing_panic() { - let input = "\ - # ls\n\ - ## some section\n\ - This is some section\n\ - \n\ - ## ANOTHER SECTION - This is the other section\n\ - with multiple lines\n"; - parse_help_section("non-existent section", input); - } - - #[test] - fn usage_parsing() { - let input = "\ - # ls\n\ - ```\n\ - ls -l\n\ - ```\n\ - ## some section\n\ - This is some section\n\ - \n\ - ## ANOTHER SECTION - This is the other section\n\ - with multiple lines\n"; - - assert_eq!(parse_usage(input), "{} -l"); - } - - #[test] - fn multi_line_usage_parsing() { - let input = "\ - # ls\n\ - ```\n\ - ls -a\n\ - ls -b\n\ - ls -c\n\ - ```\n\ - ## some section\n\ - This is some section\n"; - - assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c"); - } - - #[test] - fn about_parsing() { - let input = "\ - # ls\n\ - ```\n\ - ls -l\n\ - ```\n\ - \n\ - This is the about section\n\ - \n\ - ## some section\n\ - This is some section\n"; - - assert_eq!(parse_about(input), "This is the about section"); - } - - #[test] - fn multi_line_about_parsing() { - let input = "\ - # ls\n\ - ```\n\ - ls -l\n\ - ```\n\ - \n\ - about a\n\ - \n\ - about b\n\ - \n\ - ## some section\n\ - This is some section\n"; - - assert_eq!(parse_about(input), "about a\n\nabout b"); - } -}