Skip to content

Commit

Permalink
Replace regex-based parser for URL lines with open-coded one.
Browse files Browse the repository at this point in the history
  • Loading branch information
zackw committed Feb 13, 2017
1 parent 5817351 commit ff4758c
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 25 deletions.
2 changes: 0 additions & 2 deletions src/tools/tidy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,3 @@ version = "0.1.0"
authors = ["Alex Crichton <[email protected]>"]

[dependencies]
regex = "*"
lazy_static = "*"
3 changes: 0 additions & 3 deletions src/tools/tidy/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
//! etc. This is run by default on `make check` and as part of the auto
//! builders.

extern crate regex;
#[macro_use] extern crate lazy_static;

use std::fs;
use std::path::{PathBuf, Path};
use std::env;
Expand Down
66 changes: 46 additions & 20 deletions src/tools/tidy/src/style.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@ use std::fs::File;
use std::io::prelude::*;
use std::path::Path;

use regex::Regex;

const COLS: usize = 100;
const LICENSE: &'static str = "\
Copyright <year> The Rust Project Developers. See the COPYRIGHT
Expand All @@ -40,26 +38,54 @@ http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
option. This file may not be copied, modified, or distributed
except according to those terms.";

/// True if LINE is allowed to be longer than the normal limit.
///
/// Currently there is only one exception: if the line is within a
/// comment, and its entire text is one URL (possibly with a Markdown
/// link label in front), then it's allowed to be overlength. This is
/// because Markdown offers no way to split a line in the middle of a
/// URL, and the length of URLs for external references is beyond our
/// control.
fn long_line_is_ok(line: &str) -> bool {
lazy_static! {
static ref URL_RE: Regex = Regex::new(
// This regexp uses the CommonMark definition of link
// label. It thinks any sequence of nonwhitespace
// characters beginning with "http://" or "https://" is a
// URL. Add more schemas as necessary.
r"^\s*//[!/]?\s+(?:\[(?:[^\]\\]|\\.){1,999}\]:\s+)?https?://\S+$"
).unwrap();
/// Parser states for line_is_url.
#[derive(PartialEq)]
#[allow(non_camel_case_types)]
enum LIUState { EXP_COMMENT_START,
EXP_LINK_LABEL_OR_URL,
EXP_URL,
EXP_END }

/// True if LINE appears to be a line comment containing an URL,
/// possibly with a Markdown link label in front, and nothing else.
/// The Markdown link label, if present, may not contain whitespace.
/// Lines of this form are allowed to be overlength, because Markdown
/// offers no way to split a line in the middle of a URL, and the lengths
/// of URLs to external references are beyond our control.
fn line_is_url(line: &str) -> bool {
use self::LIUState::*;
let mut state: LIUState = EXP_COMMENT_START;

for tok in line.split_whitespace() {
match (state, tok) {
(EXP_COMMENT_START, "//") => state = EXP_LINK_LABEL_OR_URL,
(EXP_COMMENT_START, "///") => state = EXP_LINK_LABEL_OR_URL,
(EXP_COMMENT_START, "//!") => state = EXP_LINK_LABEL_OR_URL,

(EXP_LINK_LABEL_OR_URL, w)
if w.len() >= 4 && w.starts_with("[") && w.ends_with("]:")
=> state = EXP_URL,

(EXP_LINK_LABEL_OR_URL, w)
if w.starts_with("http://") || w.starts_with("https://")
=> state = EXP_END,

(EXP_URL, w)
if w.starts_with("http://") || w.starts_with("https://")
=> state = EXP_END,

(_, _) => return false,
}
}

if URL_RE.is_match(line) {
state == EXP_END
}

/// True if LINE is allowed to be longer than the normal limit.
/// Currently there is only one exception, for long URLs, but more
/// may be added in the future.
fn long_line_is_ok(line: &str) -> bool {
if line_is_url(line) {
return true;
}

Expand Down

0 comments on commit ff4758c

Please sign in to comment.