Skip to content

Commit

Permalink
Add skip_missing flag, add Input enum
Browse files Browse the repository at this point in the history
  • Loading branch information
pawroman committed Nov 24, 2020
1 parent 8025a2e commit 0b8b089
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 10 deletions.
25 changes: 20 additions & 5 deletions src/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,32 @@ use std::path::Path;
use std::{collections::HashSet, fmt::Display};
use url::Url;

#[derive(Clone, Debug)]
pub(crate) enum FileType {
HTML,
Markdown,
Plaintext,
}

#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) enum Uri {
Website(Url),
Mail(String),
}

#[derive(Clone, Debug)]
pub(crate) enum FileType {
HTML,
Markdown,
Plaintext,
impl<P: AsRef<Path>> From<P> for FileType {
/// Detect if the given path points to a Markdown, HTML, or plaintext file.
fn from(p: P) -> FileType {
let path = p.as_ref();
match path.extension() {
Some(ext) => match ext.to_str().unwrap() {
"md" => FileType::Markdown,
"html" | "htm" => FileType::HTML,
_ => FileType::Plaintext,
},
None => FileType::Plaintext,
}
}
}

impl Uri {
Expand Down
5 changes: 4 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,10 @@ fn main() -> Result<()> {
}
None => tokio::runtime::Runtime::new()?,
};
let errorcode = runtime.block_on(run(cfg, opts.inputs))?;
let errorcode = runtime.block_on(run(
cfg,
opts.inputs.iter().map(|i| i.to_string()).collect(),
))?;
std::process::exit(errorcode);
}

Expand Down
12 changes: 10 additions & 2 deletions src/options.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use crate::types::Input;
use anyhow::{Error, Result};
use serde::Deserialize;
use std::{fs, io::ErrorKind};
use structopt::StructOpt;
use url::Url;

const USER_AGENT: &str = "curl/7.71.1";
const METHOD: &str = "get";
Expand Down Expand Up @@ -35,8 +37,9 @@ macro_rules! fold_in {
about = "A boring link checker for my projects (and maybe yours)"
)]
pub(crate) struct LycheeOptions {
/// Input files
pub inputs: Vec<String>,
/// TODO: Inputs
#[structopt(parse(from_str = Input::from))]
pub inputs: Vec<Input>,

/// Configuration file to use
#[structopt(short, long = "config", default_value = "./lychee.toml")]
Expand All @@ -53,6 +56,11 @@ pub(crate) struct Config {
#[serde(default)]
pub verbose: bool,

/// TODO: Skip missing input files
#[structopt(long)]
#[serde(default)]
pub skip_missing: bool,

/// Show progress
#[structopt(short, long)]
#[serde(default)]
Expand Down
119 changes: 117 additions & 2 deletions src/types.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
use crate::options::Config;
use anyhow::anyhow;
use anyhow::{anyhow, Result};
use std::{collections::HashSet, convert::TryFrom};

use crate::extract::FileType;
use glob::glob;
use regex::RegexSet;
use std::path::{Path, PathBuf};
use tokio::fs::read_to_string;
use tokio::io::{stdin, AsyncReadExt};
use url::Url;

const STDIN: &str = "-";

/// Specifies how requests to websites will be made
pub(crate) enum RequestMethod {
Expand Down Expand Up @@ -68,7 +76,7 @@ impl From<reqwest::Error> for Status {
}

/// Exclude configuration for the link checker.
/// You can ignore links based on
/// You can ignore links based on regex patterns or pre-defined IP ranges.
pub(crate) struct Excludes {
pub regex: Option<RegexSet>,
/// Example: 192.168.0.1
Expand Down Expand Up @@ -105,3 +113,110 @@ impl Default for Excludes {
}
}
}

#[derive(Debug)]
#[non_exhaustive]
pub(crate) enum Input {
RemoteUrl(Url),
FsGlob(String),
FsPath(PathBuf),
Stdin,
}

impl ToString for Input {
fn to_string(&self) -> String {
match self {
Self::RemoteUrl(url) => url.to_string(),
Self::FsGlob(s) => s.clone(),
Self::FsPath(p) => p.to_str().unwrap_or_default().to_owned(),
Self::Stdin => STDIN.to_owned(),
}
}
}

#[derive(Debug)]
pub(crate) struct InputContent {
input: Input,
file_type: FileType,
content: String,
}

impl From<&str> for Input {
fn from(value: &str) -> Self {
if value == STDIN {
Self::Stdin
} else {
match Url::parse(&value) {
Ok(url) => Self::RemoteUrl(url),
Err(_) => Self::FsGlob(value.to_owned()),
}
}
}
}

impl Input {
async fn get_contents(self) -> Result<Vec<InputContent>> {
use Input::*;

let contents = match self {
RemoteUrl(url) => vec![Self::url_contents(url).await?],
FsGlob(path_glob) => Self::glob_contents(path_glob).await?,
FsPath(path) => vec![Self::path_content(&path).await?],
Stdin => vec![Self::stdin_content().await?],
};

Ok(contents)
}

async fn url_contents(url: Url) -> Result<InputContent> {
let res = reqwest::get(url.clone()).await?;
let content = res.text().await?;
let input_content = InputContent {
file_type: FileType::from(&url.as_str()),
input: Input::RemoteUrl(url),
content,
};

Ok(input_content)
}

async fn glob_contents(path_glob: String) -> Result<Vec<InputContent>> {
let mut contents = vec![];

for entry in glob(&path_glob)? {
match entry {
Ok(path) => {
let content = Self::path_content(&path).await?;
contents.push(content);
}
Err(e) => println!("{:?}", e),
}
}

Ok(contents)
}

async fn path_content<P: Into<PathBuf> + AsRef<Path>>(path: P) -> Result<InputContent> {
let input_content = InputContent {
file_type: FileType::from(path.as_ref()),
content: read_to_string(&path).await?,
input: Input::FsPath(path.into()),
};

Ok(input_content)
}

async fn stdin_content() -> Result<InputContent> {
let mut content = String::new();
let mut stdin = stdin();
stdin.read_to_string(&mut content).await?;

let input_content = InputContent {
input: Input::Stdin,
content,
file_type: FileType::Plaintext,
};

Ok(input_content)
}
}

0 comments on commit 0b8b089

Please sign in to comment.