From c0c5046d00e1d555e40e705aaacd6865c7ccf302 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Wed, 25 Sep 2024 18:20:27 -0500 Subject: [PATCH 1/3] Add support fo backslash escape in wikilinks --- src/parser/inlines.rs | 71 +++++++++++++++++-- .../fixtures/wikilinks_title_after_pipe.md | 24 ++++++- .../fixtures/wikilinks_title_before_pipe.md | 24 ++++++- src/tests/wikilinks.rs | 26 +++++++ 4 files changed, 137 insertions(+), 8 deletions(-) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 23179799..4becf9b1 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -1730,7 +1730,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { let startpos = self.pos; let component = self.wikilink_url_link_label()?; let url_clean = strings::clean_url(component.url); - let (link_label, link_label_start_column, link_label_end_column) = + let (link_label, link_label_start_column, _link_label_end_column) = match component.link_label { Some((label, sc, ec)) => (entity::unescape_html(label), sc, ec), None => ( @@ -1744,11 +1744,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { url: String::from_utf8(url_clean).unwrap(), }; let inl = self.make_inline(NodeValue::WikiLink(nl), startpos - 1, self.pos - 1); - inl.append(self.make_inline( - NodeValue::Text(String::from_utf8(link_label).unwrap()), - link_label_start_column, - link_label_end_column, - )); + + self.label_backslash_escapes(inl, link_label, link_label_start_column); Some(inl) } @@ -1844,6 +1841,68 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { true } + // Given a label, handles backslash escaped characters. Appends the resulting + // nodes to the container + fn label_backslash_escapes( + &mut self, + container: &'a AstNode<'a>, + label: Vec, + start_column: usize, + ) { + let mut startpos = 0; + let mut offset = 0; + + while offset < label.len() { + let c = label[offset]; + + if c == b'\\' { + if ispunct(label[offset + 1]) { + let preceding_text = self.make_inline( + NodeValue::Text( + String::from_utf8(label[startpos..offset].to_owned()).unwrap(), + ), + start_column + startpos, + start_column + offset - 1, + ); + + container.append(preceding_text); + + let inline_text = self.make_inline( + NodeValue::Text(String::from_utf8(vec![label[offset + 1]]).unwrap()), + start_column + offset, + start_column + offset + 1, + ); + + if self.options.render.escaped_char_spans { + let span = self.make_inline( + NodeValue::Escaped, + start_column + offset, + start_column + offset + 1, + ); + + span.append(inline_text); + container.append(span); + } else { + container.append(inline_text); + } + + offset += 2; + startpos = offset; + } else { + offset += 1; + } + } else { + offset += 1; + } + } + + container.append(self.make_inline( + NodeValue::Text(String::from_utf8(label[startpos..offset].to_owned()).unwrap()), + start_column + startpos, + start_column + offset - 1, + )); + } + pub fn spnl(&mut self) { self.skip_spaces(); if self.skip_line_end() { diff --git a/src/tests/fixtures/wikilinks_title_after_pipe.md b/src/tests/fixtures/wikilinks_title_after_pipe.md index 7c645ccf..ba26db77 100644 --- a/src/tests/fixtures/wikilinks_title_after_pipe.md +++ b/src/tests/fixtures/wikilinks_title_after_pipe.md @@ -44,4 +44,26 @@ HTML entities are recognized both in the name of page and in the link title. [[Geschütztes Leerzeichen|Über &nbsp;]] .

Über &nbsp;

-```````````````````````````````` \ No newline at end of file +```````````````````````````````` + +Escaping characters is supported + +```````````````````````````````` example +[[https://example.org|foo\[\]bar]] +. +

foo[]bar

+```````````````````````````````` + +```````````````````````````````` example +[[Name \[of\] page]] +. +

Name [of] page

+```````````````````````````````` + +Emphasis or other inline markdown is not supported + +```````````````````````````````` example +[[Name _of_ page]] +. +

Name _of_ page

+```````````````````````````````` diff --git a/src/tests/fixtures/wikilinks_title_before_pipe.md b/src/tests/fixtures/wikilinks_title_before_pipe.md index e430380c..4e45c60f 100644 --- a/src/tests/fixtures/wikilinks_title_before_pipe.md +++ b/src/tests/fixtures/wikilinks_title_before_pipe.md @@ -52,4 +52,26 @@ HTML entities are recognized both in the name of page and in the link title. [[Über &nbsp;|Geschütztes Leerzeichen]] .

Über &nbsp;

-```````````````````````````````` \ No newline at end of file +```````````````````````````````` + +Escaping characters is supported + +```````````````````````````````` example +[[foo\[\]bar|https://example.org]] +. +

foo[]bar

+```````````````````````````````` + +```````````````````````````````` example +[[Name \[of\] page]] +. +

Name [of] page

+```````````````````````````````` + +Emphasis or other inline markdown is not supported + +```````````````````````````````` example +[[Name _of_ page]] +. +

Name _of_ page

+```````````````````````````````` diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index afcaa07e..039816e6 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -47,6 +47,16 @@ fn wikilinks_sanitizes_the_href_attribute_case_2() { ); } +#[test] +fn wikilinks_title_escape_chars() { + html_opts!( + [extension.wikilinks_title_before_pipe, render.escaped_char_spans], + concat!("[[Name \\[of\\] page|http://example.com]]",), + concat!("

Name [of] page

\n"), + no_roundtrip, + ); +} + #[test] fn wikilinks_supercedes_relaxed_autolinks() { html_opts!( @@ -228,4 +238,20 @@ fn sourcepos() { ]) ]) ); + + assert_ast_match!( + [extension.wikilinks_title_before_pipe], + "This [[link\\[label|http://example.com]] that\n", + (document (1:1-1:44) [ + (paragraph (1:1-1:44) [ + (text (1:1-1:5) "This ") + (wikilink (1:6-1:39) [ + (text (1:8-1:11) "link") + (text (1:12-1:13) "[") + (text (1:14-1:18) "label") + ]) + (text (1:40-1:44) " that") + ]) + ]) + ); } From 273abc6abf944f88c8d264948228b0888e88388d Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Wed, 25 Sep 2024 21:06:11 -0500 Subject: [PATCH 2/3] Fix string overrun --- src/parser/inlines.rs | 53 ++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 4becf9b1..90a0411e 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -1851,46 +1851,41 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { ) { let mut startpos = 0; let mut offset = 0; + let len = label.len(); - while offset < label.len() { + while offset < len { let c = label[offset]; - if c == b'\\' { - if ispunct(label[offset + 1]) { - let preceding_text = self.make_inline( - NodeValue::Text( - String::from_utf8(label[startpos..offset].to_owned()).unwrap(), - ), - start_column + startpos, - start_column + offset - 1, - ); + if c == b'\\' && (offset + 1) < len && ispunct(label[offset + 1]) { + let preceding_text = self.make_inline( + NodeValue::Text(String::from_utf8(label[startpos..offset].to_owned()).unwrap()), + start_column + startpos, + start_column + offset - 1, + ); - container.append(preceding_text); + container.append(preceding_text); + + let inline_text = self.make_inline( + NodeValue::Text(String::from_utf8(vec![label[offset + 1]]).unwrap()), + start_column + offset, + start_column + offset + 1, + ); - let inline_text = self.make_inline( - NodeValue::Text(String::from_utf8(vec![label[offset + 1]]).unwrap()), + if self.options.render.escaped_char_spans { + let span = self.make_inline( + NodeValue::Escaped, start_column + offset, start_column + offset + 1, ); - if self.options.render.escaped_char_spans { - let span = self.make_inline( - NodeValue::Escaped, - start_column + offset, - start_column + offset + 1, - ); - - span.append(inline_text); - container.append(span); - } else { - container.append(inline_text); - } - - offset += 2; - startpos = offset; + span.append(inline_text); + container.append(span); } else { - offset += 1; + container.append(inline_text); } + + offset += 2; + startpos = offset; } else { offset += 1; } From 01bec6d5b41ca957dc72c5d9ec3a5d18540ef92b Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Fri, 27 Sep 2024 14:19:22 -0500 Subject: [PATCH 3/3] =?UTF-8?q?Ensure=20we=20don=E2=80=99t=20append=20a=20?= =?UTF-8?q?blank=20text=20node?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/parser/inlines.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 90a0411e..acea0ea0 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -1891,11 +1891,13 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> { } } - container.append(self.make_inline( - NodeValue::Text(String::from_utf8(label[startpos..offset].to_owned()).unwrap()), - start_column + startpos, - start_column + offset - 1, - )); + if startpos != offset { + container.append(self.make_inline( + NodeValue::Text(String::from_utf8(label[startpos..offset].to_owned()).unwrap()), + start_column + startpos, + start_column + offset - 1, + )); + } } pub fn spnl(&mut self) {