Skip to content

Commit

Permalink
chore: Use lychee as a link checker. (#1328)
Browse files Browse the repository at this point in the history
fixes #1306 by migrating from a node-based link checker to a rust-based link checker
  • Loading branch information
ptondereau authored Oct 4, 2022
1 parent 9b3bc35 commit 803d45f
Show file tree
Hide file tree
Showing 8 changed files with 1,059 additions and 89 deletions.
900 changes: 888 additions & 12 deletions Cargo.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ tar = "0.4"
termimad = "0.20"
tempdir = "0.3"
tempfile = "3.3"
tokio = "1.21"
tokio-stream = "0.1"
toml = "0.5"
tracing = "0.1"
tracing-core = "0.1"
Expand Down
18 changes: 0 additions & 18 deletions mlc_config.json

This file was deleted.

5 changes: 5 additions & 0 deletions xtask/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,10 @@ reqwest = { workspace = true, default-features = false, features = ["blocking",
semver = { workspace = true }
serde_json_traversal = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true, default-features = false, features = ["rt"] }
tokio-stream = { workspace = true }
which = { workspace = true }
zip = { workspace = true, default-features = false }

[target.'cfg(not(windows))'.dependencies]
lychee-lib = { version = "0.10", features = ["vendored-openssl"] }
20 changes: 20 additions & 0 deletions xtask/src/commands/lint.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
use saucer::Result;
use saucer::{clap, Parser};

#[cfg(not(windows))]
use crate::tools::LycheeRunner;

use crate::tools::{CargoRunner, NpmRunner};

#[derive(Debug, Parser)]
Expand All @@ -12,6 +15,23 @@ impl Lint {
cargo_runner.lint()?;
let npm_runner = NpmRunner::new(verbose)?;
npm_runner.lint()?;
lint_links(verbose)?;

Ok(())
}
}

#[cfg(not(windows))]
fn lint_links(verbose: bool) -> Result<()> {
let lychee_runner = LycheeRunner::new(verbose)?;
lychee_runner.lint()?;

Ok(())
}

#[cfg(windows)]
fn lint_links(_verbose: bool) -> Result<()> {
println!("Skipping the lint checcker.");

Ok(())
}
140 changes: 140 additions & 0 deletions xtask/src/tools/lychee.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#[cfg(not(windows))]
use lychee_lib::{
Client, ClientBuilder, Collector, FileType, Input, InputSource, Result as LycheeResult, Uri,
};
#[cfg(not(windows))]
use reqwest::StatusCode;
#[cfg(not(windows))]
use saucer::{anyhow, Result, Utf8PathBuf};
#[cfg(not(windows))]
use std::{collections::HashSet, fs, path::PathBuf, time::Duration};
#[cfg(not(windows))]
use tokio::runtime::Runtime;
#[cfg(not(windows))]
use tokio_stream::StreamExt;

#[cfg(not(windows))]
use crate::utils::PKG_PROJECT_ROOT;

#[cfg(not(windows))]
pub(crate) struct LycheeRunner {
client: Client,
verbose: bool,
}

#[cfg(not(windows))]
impl LycheeRunner {
pub(crate) fn new(verbose: bool) -> Result<Self> {
let accepted = Some(HashSet::from_iter(vec![
StatusCode::OK,
StatusCode::TOO_MANY_REQUESTS,
]));

let client = ClientBuilder::builder()
.exclude_all_private(true)
.exclude_mail(true)
.retry_wait_time(Duration::from_secs(30))
.max_retries(5u8)
.accepted(accepted)
.build()
.client()?;

Ok(Self { client, verbose })
}

pub(crate) fn lint(&self) -> Result<()> {
if self.verbose {
println!("Checking links in documentation");
}

let inputs: Vec<Input> = get_md_files()
.iter()
.map(|file| Input {
source: InputSource::FsPath(PathBuf::from(file)),
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
})
.collect();

let rt = Runtime::new()?;

let lychee_client = self.client.clone();

rt.block_on(async move {
let links = Collector::new(None)
.collect_links(inputs)
.await
.collect::<LycheeResult<Vec<_>>>()
.await?;

let mut failed_checks: Vec<Uri> = vec![];
let links_size = links.len();

for link in links {
let response = lychee_client.check(link).await?;
if response.status().is_failure() {
failed_checks.push(response.1.uri.clone());
} else if response.status().is_success() {
println!("[✓] {}", response.1.uri.as_str());
}
}

println!("{} links checked.", links_size);

if !failed_checks.is_empty() {
for failed_check in failed_checks {
println!("[x] {}", failed_check.as_str());
}

Err(anyhow!("Some links in markdown documentation are down."))
} else {
Ok(())
}
})?;

Ok(())
}
}

#[cfg(not(windows))]
fn get_md_files() -> Vec<Utf8PathBuf> {
let mut md_files = Vec::new();

walk_dir(PKG_PROJECT_ROOT.as_str(), &mut md_files);

md_files
}

#[cfg(not(windows))]
fn walk_dir(base_dir: &str, md_files: &mut Vec<Utf8PathBuf>) {
if let Ok(entries) = fs::read_dir(base_dir) {
for entry in entries.flatten() {
if let Ok(file_type) = entry.file_type() {
if file_type.is_file() {
if let Ok(file_name) = entry.file_name().into_string() {
if file_name.ends_with(".md") {
if let Ok(entry_path) = Utf8PathBuf::try_from(entry.path()) {
md_files.push(entry_path)
}
}
}
} else if file_type.is_dir() {
if let Ok(dir_name) = entry.file_name().into_string() {
// we can't do much if a link is broken in node_modules (and it's big!)
if dir_name != "node_modules"
// we don't need to check the Rust compiler's output for broken links
&& dir_name != "target"
// the docs have their own link checker, no need to check twice
&& dir_name != "docs"
&& dir_name != "dev-docs"
// also no need to recurse through hidden directories
&& !dir_name.starts_with('.')
{
walk_dir(&dir_name, md_files);
}
}
}
}
}
}
}
3 changes: 3 additions & 0 deletions xtask/src/tools/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
mod cargo;
mod git;
mod lychee;
mod make;
mod npm;
mod runner;
mod strip;

pub(crate) use cargo::CargoRunner;
pub(crate) use git::GitRunner;
#[cfg(not(windows))]
pub(crate) use lychee::LycheeRunner;
pub(crate) use make::MakeRunner;
pub(crate) use npm::NpmRunner;
pub(crate) use runner::Runner;
Expand Down
60 changes: 1 addition & 59 deletions xtask/src/tools/npm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use saucer::{anyhow, Context, Result};
use saucer::{Fs, Utf8PathBuf};
use which::which;

use std::{convert::TryFrom, fs, str};
use std::{fs, str};

use crate::info;
use crate::{
Expand Down Expand Up @@ -102,23 +102,6 @@ impl NpmRunner {
self.npm_exec(&["install"], &self.rover_client_lint_directory)?;
self.npm_exec(&["run", "lint"], &self.rover_client_lint_directory)?;

let files = get_md_files();

for file in files {
self.npm_exec(
&[
"exec",
"--yes",
"--",
"markdown-link-check",
file.as_str(),
"--config=mlc_config.json",
"-v",
],
&PKG_PROJECT_ROOT,
)?;
}

Ok(())
}

Expand Down Expand Up @@ -223,44 +206,3 @@ fn assert_publish_includes(output: &CommandOutput) -> Result<()> {
))
}
}

fn get_md_files() -> Vec<Utf8PathBuf> {
let mut md_files = Vec::new();

walk_dir(PKG_PROJECT_ROOT.as_str(), &mut md_files);

md_files
}

fn walk_dir(base_dir: &str, md_files: &mut Vec<Utf8PathBuf>) {
if let Ok(entries) = fs::read_dir(base_dir) {
for entry in entries.flatten() {
if let Ok(file_type) = entry.file_type() {
if file_type.is_file() {
if let Ok(file_name) = entry.file_name().into_string() {
// the CHANGELOG is simply too large to be running this check on every PR
if file_name.ends_with(".md") && !file_name.contains("CHANGELOG") {
if let Ok(entry_path) = Utf8PathBuf::try_from(entry.path()) {
md_files.push(entry_path)
}
}
}
} else if file_type.is_dir() {
if let Ok(dir_name) = entry.file_name().into_string() {
// we can't do much if a link is broken in node_modules (and it's big!)
if dir_name != "node_modules"
// we don't need to check the Rust compiler's output for broken links
&& dir_name != "target"
// the docs have their own link checker, no need to check twice
&& dir_name != "docs"
// also no need to recurse through hidden directories
&& !dir_name.starts_with('.')
{
walk_dir(&dir_name, md_files);
}
}
}
}
}
}
}

0 comments on commit 803d45f

Please sign in to comment.