uudoc,uucore_procs: move md parsing to help_parser

uutils · Mar 23, 2023 · 6265cf9 · 6265cf9
1 parent 57541db
commit 6265cf9
Show file tree

Hide file tree

Showing 7 changed files with 282 additions and 282 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -359,6 +359,8 @@ selinux = { workspace=true, optional = true }
 textwrap = { workspace=true }
 zip = { workspace=true, optional = true }
 
+help_parser = { path="src/help_parser" }
+
 # * uutils
 uu_test  = { optional=true, version="0.0.17", package="uu_test", path="src/uu/test" }
 #
@@ -512,3 +514,6 @@ path = "src/bin/coreutils.rs"
 name = "uudoc"
 path = "src/bin/uudoc.rs"
 required-features = ["uudoc"]
+
+[package.metadata.cargo-udeps.ignore]
+normal = ["help_parser"]
diff --git a/src/bin/uudoc.rs b/src/bin/uudoc.rs
@@ -133,7 +133,7 @@ impl<'a, 'b> MDWriter<'a, 'b> {
         write!(self.w, "# {}\n\n", self.name)?;
         self.additional()?;
         self.usage()?;
-        self.description()?;
+        self.about()?;
         self.options()?;
         self.after_help()?;
         self.examples()
@@ -177,54 +177,34 @@ impl<'a, 'b> MDWriter<'a, 'b> {
     }
 
     fn usage(&mut self) -> io::Result<()> {
-        writeln!(self.w, "\n```")?;
-        let mut usage: String = self
-            .command
-            .render_usage()
-            .to_string()
-            .lines()
-            .map(|l| l.strip_prefix("Usage:").unwrap_or(l))
-            .map(|l| l.trim())
-            .filter(|l| !l.is_empty())
-            .collect::<Vec<_>>()
-            .join("\n");
-        usage = usage
-            .to_string()
-            .replace(uucore::execution_phrase(), self.name);
-        writeln!(self.w, "{}", usage)?;
-        writeln!(self.w, "```")
-    }
+        if let Some(markdown) = &self.markdown {
+            let usage = help_parser::parse_usage(&markdown);
+            let usage = usage.replace("{}", self.name);
 
-    fn description(&mut self) -> io::Result<()> {
-        if let Some(after_help) = self.markdown_section("about") {
-            return writeln!(self.w, "\n\n{}", after_help);
+            writeln!(self.w, "\n```")?;
+            writeln!(self.w, "{}", usage)?;
+            writeln!(self.w, "```")
+        } else {
+            Ok(())
         }
+    }
 
-        if let Some(about) = self
-            .command
-            .get_long_about()
-            .or_else(|| self.command.get_about())
-        {
-            writeln!(self.w, "{}", about)
+    fn about(&mut self) -> io::Result<()> {
+        if let Some(markdown) = &self.markdown {
+            writeln!(self.w, "{}", help_parser::parse_about(&markdown))
         } else {
             Ok(())
         }
     }
 
     fn after_help(&mut self) -> io::Result<()> {
-        if let Some(after_help) = self.markdown_section("after help") {
-            return writeln!(self.w, "\n\n{}", after_help);
+        if let Some(markdown) = &self.markdown {
+            if let Some(after_help) = help_parser::parse_section("after help", &markdown) {
+                return writeln!(self.w, "\n\n{after_help}");
+            }
         }
 
-        if let Some(after_help) = self
-            .command
-            .get_after_long_help()
-            .or_else(|| self.command.get_after_help())
-        {
-            writeln!(self.w, "\n\n{}", after_help)
-        } else {
-            Ok(())
-        }
+        Ok(())
     }
 
     fn examples(&mut self) -> io::Result<()> {
@@ -327,32 +307,6 @@ impl<'a, 'b> MDWriter<'a, 'b> {
         }
         writeln!(self.w, "</dl>\n")
     }
-
-    fn markdown_section(&self, section: &str) -> Option<String> {
-        let md = self.markdown.as_ref()?;
-        let section = section.to_lowercase();
-
-        fn is_section_header(line: &str, section: &str) -> bool {
-            line.strip_prefix("##")
-                .map_or(false, |l| l.trim().to_lowercase() == section)
-        }
-
-        let result = md
-            .lines()
-            .skip_while(|&l| !is_section_header(l, &section))
-            .skip(1)
-            .take_while(|l| !l.starts_with("##"))
-            .collect::<Vec<_>>()
-            .join("\n")
-            .trim()
-            .to_string();
-
-        if !result.is_empty() {
-            Some(result)
-        } else {
-            None
-        }
-    }
 }
 
 fn get_zip_content(archive: &mut ZipArchive<impl Read + Seek>, name: &str) -> Option<String> {

diff --git a/src/help_parser/Cargo.toml b/src/help_parser/Cargo.toml
@@ -0,0 +1,5 @@
+[package]
+name = "help_parser"
+version = "0.0.17"
+edition = "2021"
+license = "MIT"
diff --git a/src/help_parser/src/lib.rs b/src/help_parser/src/lib.rs
@@ -0,0 +1,236 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+
+//! A collection of functions to parse the markdown code of help files.
+//!
+//! The structure of the markdown code is assumed to be:
+//!
+//! # util name
+//!
+//! ```text
+//! usage info
+//! ```
+//!
+//! About text
+//!
+//! ## Section 1
+//!
+//! Some content
+//!
+//! ## Section 2
+//!
+//! Some content
+
+const MARKDOWN_CODE_FENCES: &str = "```";
+
+/// Parses the text between the first markdown code block and the next header, if any,
+/// into an about string.
+pub fn parse_about(content: &str) -> String {
+    content
+        .lines()
+        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .skip(1)
+        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .skip(1)
+        .take_while(|l| !l.starts_with('#'))
+        .collect::<Vec<_>>()
+        .join("\n")
+        .trim()
+        .to_string()
+}
+
+/// Parses the first markdown code block into a usage string
+///
+/// The code fences are removed and the name of the util is replaced
+/// with `{}` so that it can be replaced with the appropriate name
+/// at runtime.
+pub fn parse_usage(content: &str) -> String {
+    content
+        .lines()
+        .skip_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .skip(1)
+        .take_while(|l| !l.starts_with(MARKDOWN_CODE_FENCES))
+        .map(|l| {
+            // Replace the util name (assumed to be the first word) with "{}"
+            // to be replaced with the runtime value later.
+            if let Some((_util, args)) = l.split_once(' ') {
+                format!("{{}} {args}\n")
+            } else {
+                "{}\n".to_string()
+            }
+        })
+        .collect::<Vec<_>>()
+        .join("")
+        .trim()
+        .to_string()
+}
+
+/// Get a single section from content
+///
+/// The section must be a second level section (i.e. start with `##`).
+pub fn parse_section(section: &str, content: &str) -> Option<String> {
+    fn is_section_header(line: &str, section: &str) -> bool {
+        line.strip_prefix("##")
+            .map_or(false, |l| l.trim().to_lowercase() == section)
+    }
+
+    let section = &section.to_lowercase();
+
+    // We cannot distinguish between an empty or non-existing section below,
+    // so we do a quick test to check whether the section exists
+    if content.lines().all(|l| !is_section_header(l, section)) {
+        return None;
+    }
+
+    // Prefix includes space to allow processing of section with level 3-6 headers
+    let section_header_prefix = "## ";
+
+    Some(
+        content
+            .lines()
+            .skip_while(|&l| !is_section_header(l, section))
+            .skip(1)
+            .take_while(|l| !l.starts_with(section_header_prefix))
+            .collect::<Vec<_>>()
+            .join("\n")
+            .trim()
+            .to_string(),
+    )
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_section() {
+        let input = "\
+            # ls\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert_eq!(
+            parse_section("some section", input).unwrap(),
+            "This is some section"
+        );
+        assert_eq!(
+            parse_section("SOME SECTION", input).unwrap(),
+            "This is some section"
+        );
+        assert_eq!(
+            parse_section("another section", input).unwrap(),
+            "This is the other section\nwith multiple lines"
+        );
+    }
+
+    #[test]
+    fn test_parse_section_with_sub_headers() {
+        let input = "\
+            # ls\n\
+            ## after section\n\
+            This is some section\n\
+            \n\
+            ### level 3 header\n\
+            \n\
+            Additional text under the section.\n\
+            \n\
+            #### level 4 header\n\
+            \n\
+            Yet another paragraph\n";
+
+        assert_eq!(
+            parse_section("after section", input).unwrap(),
+            "This is some section\n\n\
+            ### level 3 header\n\n\
+            Additional text under the section.\n\n\
+            #### level 4 header\n\n\
+            Yet another paragraph"
+        );
+    }
+
+    #[test]
+    fn test_parse_non_existing_section() {
+        let input = "\
+            # ls\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert!(parse_section("non-existing section", input).is_none());
+    }
+
+    #[test]
+    fn test_parse_usage() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            ## some section\n\
+            This is some section\n\
+            \n\
+            ## ANOTHER SECTION
+            This is the other section\n\
+            with multiple lines\n";
+
+        assert_eq!(parse_usage(input), "{} -l");
+    }
+
+    #[test]
+    fn test_parse_multi_line_usage() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -a\n\
+            ls -b\n\
+            ls -c\n\
+            ```\n\
+            ## some section\n\
+            This is some section\n";
+
+        assert_eq!(parse_usage(input), "{} -a\n{} -b\n{} -c");
+    }
+
+    #[test]
+    fn test_parse_about() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            \n\
+            This is the about section\n\
+            \n\
+            ## some section\n\
+            This is some section\n";
+
+        assert_eq!(parse_about(input), "This is the about section");
+    }
+
+    #[test]
+    fn test_parse_multi_line_about() {
+        let input = "\
+            # ls\n\
+            ```\n\
+            ls -l\n\
+            ```\n\
+            \n\
+            about a\n\
+            \n\
+            about b\n\
+            \n\
+            ## some section\n\
+            This is some section\n";
+
+        assert_eq!(parse_about(input), "about a\n\nabout b");
+    }
+}