-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
WIP: Improve MathJax Support #626
Changes from 14 commits
2f89760
643edbe
7d0a1f1
16e22e1
7657044
b4f77eb
784b814
4aa0df1
534dfb4
2a05964
92259f1
e5be09d
121f638
ea4f998
a55e640
1b0abf7
e68bee5
1648126
2512b87
1871b87
92b5fad
629c778
7540a3e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,261 @@ | ||
//! Preprocessor that converts mathematical expression into MathJax. | ||
//! | ||
//! This preprocessor takes inline expressions wrapped in `$`-pairs and block | ||
//! expressions wrapped in `$$`-pairs and transform them into a valid MathJax | ||
//! expression that does not interfere with the markdown parser. | ||
|
||
use errors::Result; | ||
use regex::{CaptureMatches, Captures, Regex}; | ||
|
||
use super::{Preprocessor, PreprocessorContext}; | ||
use book::{Book, BookItem}; | ||
|
||
/// a preprocessor for expanding `$`- and `$$`-pairs into valid MathJax expressions. | ||
pub struct MathJaxPreprocessor; | ||
|
||
impl MathJaxPreprocessor { | ||
/// Create a `MathJaxPreprocessor`. | ||
pub fn new() -> Self { | ||
MathJaxPreprocessor | ||
} | ||
} | ||
|
||
impl Preprocessor for MathJaxPreprocessor { | ||
fn name(&self) -> &str { | ||
"mathjax" | ||
} | ||
|
||
fn run(&self, _ctx: &PreprocessorContext, book: &mut Book) -> Result<()> { | ||
book.for_each_mut(|section: &mut BookItem| { | ||
if let BookItem::Chapter(ref mut chapter) = *section { | ||
let content = replace_all_mathematics(&chapter.content); | ||
chapter.content = content; | ||
} | ||
}); | ||
|
||
Ok(()) | ||
} | ||
} | ||
|
||
fn replace_all_mathematics(content: &str) -> String { | ||
let mut previous_end_index = 0; | ||
let mut replaced = String::new(); | ||
|
||
for math in find_mathematics(content) { | ||
replaced.push_str(&content[previous_end_index..math.start_index]); | ||
replaced.push_str(&math.replacement()); | ||
previous_end_index = math.end_index; | ||
} | ||
|
||
replaced.push_str(&content[previous_end_index..]); | ||
|
||
replaced | ||
} | ||
|
||
fn find_mathematics(content: &str) -> MathematicsIterator { | ||
lazy_static! { | ||
static ref REGEXP: Regex = Regex::new(r"(?x) # insignificant whitespace mode | ||
# Mathematics is | ||
|
||
# Block mathematics is | ||
(\$\$) # a double dollar sign | ||
([^$]+) # followed by one or more things other than a dollar sign | ||
(\$\$) # followed by a closing double dollar sign. | ||
|
||
| # or | ||
|
||
# Inline mathematics is | ||
(\$) # a dollar sign | ||
([^$]+) # followed by one or more things other than a dollar sign | ||
(\$) # followed by a closing dollar sign. | ||
|
||
| # or | ||
|
||
# Legacy inline mathematics | ||
(\\\\\() # An escaped opening bracket `\\(` | ||
([^)]+) # followed by one or more other things TODO provide correct regexp. | ||
(\\\\\)) # followed by a closing bracket `\\)` | ||
|
||
| # or | ||
|
||
# Legacy block mathematics | ||
(\\\\\[) # An escaped opening bracket `\\[` | ||
([^$]+) # followed by one ore more other things TODO provide correct regexp. | ||
(\\\\\]) # followed by a closing bracket `\\]` | ||
").unwrap(); | ||
} | ||
MathematicsIterator(REGEXP.captures_iter(content)) | ||
} | ||
|
||
struct MathematicsIterator<'a>(CaptureMatches<'a, 'a>); | ||
|
||
impl<'a> Iterator for MathematicsIterator<'a> { | ||
type Item = Mathematics<'a>; | ||
|
||
fn next(&mut self) -> Option<Self::Item> { | ||
for capture in &mut self.0 { | ||
if let mathematics @ Some(_) = Mathematics::from_capture(capture) { | ||
return mathematics; | ||
} | ||
} | ||
None | ||
} | ||
} | ||
|
||
#[derive(Debug, PartialEq, Eq)] | ||
struct Mathematics<'a> { | ||
start_index: usize, | ||
end_index: usize, | ||
kind: Kind, | ||
text: &'a str, | ||
} | ||
|
||
#[derive(Debug, PartialEq, Eq, Clone, Copy)] | ||
enum Kind { | ||
Inline, | ||
Block, | ||
LegacyInline, | ||
LegacyBlock, | ||
} | ||
|
||
impl<'a> Mathematics<'a> { | ||
fn from_capture(captures: Captures<'a>) -> Option<Self> { | ||
let kind = | ||
captures.get(1).or(captures.get(4)).or(captures.get(7)).or(captures.get(10)) | ||
.map(|delimiter| | ||
match delimiter.as_str() { | ||
"$$" => Kind::Block, | ||
"$" => Kind::Inline, | ||
"\\\\[" => Kind::LegacyBlock, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can always use a raw string here, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That is a very good suggestion. I wasn't aware of raw strings. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✔️ |
||
_ => Kind::LegacyInline, | ||
}) | ||
.expect("captured mathematics should have opening delimiter at the provided indices"); | ||
|
||
captures.get(0).map(|m| Mathematics { | ||
start_index: m.start(), | ||
end_index: m.end(), | ||
kind: kind, | ||
text: kind.text(m.as_str()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For mathjax to work properly it needs the surrounding dollar signs. I strip them here so we can put them in when we do text replacement. This way we can treat the intended and the legacy case the same. ✔️ |
||
}) | ||
} | ||
|
||
fn replacement(&self) -> String { | ||
let mut replacement = String::new(); | ||
match self.kind { | ||
Kind::Block | Kind::LegacyBlock => { | ||
replacement.push_str("<div class=\"math\">$$"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These format!(r#"<div class="math">$${}$$</div>"#, self.text) ... Or do you think the incremental string building is more readable? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I think I prefer the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✔️ |
||
replacement.push_str(self.text); | ||
replacement.push_str("$$</div>"); | ||
}, | ||
Kind::Inline | Kind::LegacyInline => { | ||
replacement.push_str("<span class=\"inline math\">$"); | ||
replacement.push_str(self.text); | ||
replacement.push_str("$</span>"); | ||
}, | ||
} | ||
replacement | ||
} | ||
} | ||
|
||
impl Kind { | ||
fn text<'a>(&self, delimited_text: &'a str) -> &'a str { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have thought this method belongs on the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I placed it on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I opted for a auxiliary function with the same functionality. ✔️ |
||
let end = delimited_text.len(); | ||
match *self { | ||
Kind::Block => &delimited_text[2..end-2], | ||
Kind::Inline => &delimited_text[1..end-1], | ||
Kind::LegacyBlock => &delimited_text[3..end-3], | ||
Kind::LegacyInline => &delimited_text[3..end-3], | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As you've mentioned, we probably want to flesh these test out a bit. Off the top of my head we probably want to check
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking of these kinds of tests. I will add them in. |
||
use super::*; | ||
|
||
#[test] | ||
fn should_find_no_mathematics_in_regular_text() { | ||
let content = "Text without mathematics"; | ||
|
||
assert_eq!(find_mathematics(content).count(), 0); | ||
} | ||
|
||
#[test] | ||
fn should_find_inline_mathematics() { | ||
let content = "Pythagorean theorem: $a^{2} + b^{2} = c^{2}$"; | ||
|
||
let result = find_mathematics(content).collect::<Vec<_>>(); | ||
|
||
assert_eq!(result.len(), 1); | ||
assert_eq!(result[0], Mathematics { | ||
start_index: 21, | ||
end_index: 44, | ||
kind: Kind::Inline, | ||
text: "a^{2} + b^{2} = c^{2}", | ||
}) | ||
} | ||
|
||
#[test] | ||
fn should_find_block_mathematics() { | ||
let content = "Euler's identity: $$e^{i\\pi} + 1 = 0$$"; | ||
|
||
let result = find_mathematics(content).collect::<Vec<_>>(); | ||
|
||
assert_eq!(result.len(), 1); | ||
assert_eq!(result[0], Mathematics { | ||
start_index: 18, | ||
end_index: 38, | ||
kind: Kind::Block, | ||
text: "e^{i\\pi} + 1 = 0", | ||
}) | ||
} | ||
|
||
#[test] | ||
fn should_find_legacy_inline_mathematics() { | ||
let content = "Pythagorean theorem: \\\\(a^{2} + b^{2} = c^{2}\\\\)"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Might want to use raw strings here as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 👍 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✔️ |
||
|
||
let result = find_mathematics(content).collect::<Vec<_>>(); | ||
|
||
assert_eq!(result.len(), 1); | ||
assert_eq!(result[0], Mathematics { | ||
start_index: 21, | ||
end_index: 48, | ||
kind: Kind::LegacyInline, | ||
text: "a^{2} + b^{2} = c^{2}", | ||
}) | ||
} | ||
|
||
#[test] | ||
fn should_find_legacy_block_mathematics() { | ||
let content = "Euler's identity: \\\\[e^{i\\pi} + 1 = 0\\\\]"; | ||
|
||
let result = find_mathematics(content).collect::<Vec<_>>(); | ||
|
||
assert_eq!(result.len(), 1); | ||
assert_eq!(result[0], Mathematics { | ||
start_index: 18, | ||
end_index: 40, | ||
kind: Kind::LegacyBlock, | ||
text: "e^{i\\pi} + 1 = 0", | ||
}) | ||
} | ||
|
||
#[test] | ||
fn should_replace_inline_mathematics() { | ||
let content = "Pythagorean theorem: $a^{2} + b^{2} = c^{2}$"; | ||
|
||
let result = replace_all_mathematics(content); | ||
|
||
assert_eq!(result, "Pythagorean theorem: <span class=\"inline math\">$a^{2} + b^{2} = c^{2}$</span>") | ||
} | ||
|
||
#[test] | ||
fn should_replace_block_mathematics() { | ||
let content = "Euler's identity: $$e^{i\\pi} + 1 = 0$$"; | ||
|
||
let result = replace_all_mathematics(content); | ||
|
||
assert_eq!(result, "Euler's identity: <div class=\"math\">$$e^{i\\pi} + 1 = 0$$</div>") | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for taking the time to split this regex up and spell out what it's doing! Regular expressions are usually unreadable, so when you mentioned you weren't too confident with the regex I was a little afraid of what I'd see 😜
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I has been said that regular expressions are a write only tool. I am pretty confident with them, but I know for a fact that it currently is incorrect.