Skip to content

Commit

Permalink
renumber footnotes
Browse files Browse the repository at this point in the history
Add a footnote transformer, which just assigns sequential numbers to
footnotes starting at `1`.

A note on `drain_pending_footnotes`: in this change, we transform the
`&'md String`s into full `String`s during the drain, so that we can
easily use them as a sort key. We could have done something more clever,
like creating an enum that captures either the `&md String` or the
`usize` of the transformation, and then compares those; but that felt
like more effort than we really need, especially given that the current
implementation of `FootnoteTransformer::write` eventually String-ifies
the `usize` anyway; this just does it ahead of time. In practice, we
only expect to see a handful of footnotes, and each of them will only be
a handful of chars long (even 10,000 footnotes would only be 5-chars 
long at their longest!)

Resolves #89.
  • Loading branch information
yshavit authored Aug 25, 2024
1 parent 285d71c commit 8da4a62
Show file tree
Hide file tree
Showing 10 changed files with 242 additions and 6 deletions.
3 changes: 3 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ pub struct Cli {
#[arg(long, short, value_enum, default_value_t=LinkTransform::Reference)]
pub(crate) link_format: LinkTransform,

#[arg(long, default_value_t = true, action = clap::ArgAction::Set)]
pub(crate) renumber_footnotes: bool,

/// Output the results as a JSON object, instead of as markdown.
#[arg(long, short, default_value_t = OutputFormat::Markdown)]
pub(crate) output: OutputFormat,
Expand Down
38 changes: 36 additions & 2 deletions src/fmt_md.rs
Original file line number Diff line number Diff line change
Expand Up @@ -425,11 +425,11 @@ impl<'s, 'md> MdWriterState<'s, 'md> {
}
if matches!(which, DefinitionsToWrite::Footnotes | DefinitionsToWrite::Both) {
let mut defs_to_write: Vec<_> = self.inlines_writer.drain_pending_footnotes();
defs_to_write.sort_by_key(|&kv| kv.0);
defs_to_write.sort_unstable_by(|a, b| (&a.0).cmp(&b.0));

for (link_ref, text) in defs_to_write {
out.write_str("[^");
out.write_str(link_ref);
out.write_str(&link_ref);
out.write_str("]: ");
out.with_block(Block::Inlined(2), |out| {
self.write_md(out, Self::doc_iter(text), false);
Expand Down Expand Up @@ -1783,6 +1783,40 @@ pub mod tests {
-----"#},
)
}

/// see [crate::footnote_transform::test] for more extensive tests TODO need to add those tests!
#[test]
fn footnote_transform_smoke_test() {
check_render_refs_with(
&MdOptions::new_with(|mdo| mdo.inline_options.renumber_footnotes = true),
vec![MdElemRef::Paragraph(&footnote_a_in_paragraph())],
indoc! {r#"
[^1]
[^1]: the footnote text"#},
);
}

#[test]
fn footnote_no_transform_smoke_test() {
check_render_refs_with(
&MdOptions::new_with(|mdo| mdo.inline_options.renumber_footnotes = false),
vec![MdElemRef::Paragraph(&footnote_a_in_paragraph())],
indoc! {r#"
[^a]
[^a]: the footnote text"#},
);
}

fn footnote_a_in_paragraph() -> Paragraph {
Paragraph {
body: vec![Inline::Footnote(Footnote {
label: "a".to_string(),
text: md_elems!("the footnote text"),
})],
}
}
}

mod annotation_and_footnote_layouts {
Expand Down
20 changes: 17 additions & 3 deletions src/fmt_md_inlines.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::footnote_transform::FootnoteTransformer;
use crate::link_transform::{LinkLabel, LinkTransform, LinkTransformation, LinkTransformer};
use crate::output::{Output, SimpleWrite};
use crate::tree::{
Expand All @@ -12,13 +13,15 @@ use std::collections::{HashMap, HashSet};
#[derive(Debug, Copy, Clone)]
pub struct MdInlinesWriterOptions {
pub link_format: LinkTransform,
pub renumber_footnotes: bool,
}

pub struct MdInlinesWriter<'md> {
seen_links: HashSet<LinkLabel<'md>>,
seen_footnotes: HashSet<&'md String>,
pending_references: PendingReferences<'md>,
link_transformer: LinkTransformer,
footnote_transformer: FootnoteTransformer<'md>,
}

struct PendingReferences<'md> {
Expand Down Expand Up @@ -76,6 +79,7 @@ impl<'md> MdInlinesWriter<'md> {
seen_footnotes: HashSet::with_capacity(pending_refs_capacity),
pending_references: PendingReferences::with_capacity(pending_refs_capacity),
link_transformer: LinkTransformer::from(options.link_format),
footnote_transformer: FootnoteTransformer::new(options.renumber_footnotes),
}
}

Expand All @@ -99,8 +103,15 @@ impl<'md> MdInlinesWriter<'md> {
self.pending_references.links.drain().collect()
}

pub fn drain_pending_footnotes(&mut self) -> Vec<(&'md String, &'md Vec<MdElem>)> {
self.pending_references.footnotes.drain().collect()
pub fn drain_pending_footnotes(&mut self) -> Vec<(String, &'md Vec<MdElem>)> {
let mut result = Vec::with_capacity(self.pending_references.footnotes.len());
let mut to_stringer = self.footnote_transformer.new_to_stringer();

for (k, v) in self.pending_references.footnotes.drain() {
let transformed_k = to_stringer.transform(k);
result.push((transformed_k, v))
}
result
}

pub fn write_line<I, W>(&mut self, out: &mut Output<W>, elems: I)
Expand Down Expand Up @@ -157,7 +168,7 @@ impl<'md> MdInlinesWriter<'md> {
Inline::Image(image) => self.write_linklike(out, image),
Inline::Footnote(Footnote { label, text }) => {
out.write_str("[^");
out.write_str(label);
self.footnote_transformer.write(out, label);
out.write_char(']');
if self.seen_footnotes.insert(label) {
self.pending_references.footnotes.insert(label, text);
Expand Down Expand Up @@ -477,6 +488,7 @@ mod tests {
let mut output = Output::new(String::new());
let mut writer = MdInlinesWriter::new(MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
});
let link = Inline::Link(Link {
text: vec![Inline::Text(Text {
Expand Down Expand Up @@ -520,6 +532,7 @@ mod tests {
let mut output = Output::new(String::new());
let mut writer = MdInlinesWriter::new(MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
});
let link = Inline::Image(Image {
alt: input_description.to_string(),
Expand All @@ -544,6 +557,7 @@ mod tests {
let mut output = Output::new(String::new());
let mut writer = MdInlinesWriter::new(MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
});
writer.write_inline_element(&mut output, &orig);
let md_str = output.take_underlying().unwrap();
Expand Down
132 changes: 132 additions & 0 deletions src/footnote_transform.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
use crate::output::{Output, SimpleWrite};
use std::collections::HashMap;

pub struct FootnoteTransformer<'md> {
mappings: Option<HashMap<&'md str, usize>>,
}

pub struct FootnoteTransformerToString<'a, 'md> {
transformer: &'a mut FootnoteTransformer<'md>,
scratch: Output<String>,
}

impl<'md> FootnoteTransformer<'md> {
pub fn new(active: bool) -> Self {
Self {
mappings: if active { Some(HashMap::default()) } else { None },
}
}

pub fn write<W>(&mut self, out: &mut Output<W>, label: &'md str)
where
W: SimpleWrite,
{
match &mut self.mappings {
None => out.write_str(label),
Some(mapping) => {
let current_mapping_len = mapping.len();
let num = mapping.entry(label).or_insert(current_mapping_len + 1);
out.write_str(&num.to_string());
}
}
}

pub fn new_to_stringer<'a>(&'a mut self) -> FootnoteTransformerToString<'a, 'md> {
FootnoteTransformerToString::new(self)
}
}

impl<'a, 'md> FootnoteTransformerToString<'a, 'md> {
pub fn transform(&mut self, label: &'md str) -> String {
let len = self.transformed_label_len(label);
_ = self.scratch.replace_underlying(String::with_capacity(len)).unwrap();
self.transformer.write(&mut self.scratch, label);
self.scratch.take_underlying().unwrap()
}

fn new(transformer: &'a mut FootnoteTransformer<'md>) -> Self {
Self {
transformer,
scratch: Output::new(String::new()),
}
}

fn transformed_label_len(&mut self, label: &str) -> usize {
match &mut self.transformer.mappings {
None => label.len(),
Some(mapping) => {
let renumbered_to = mapping.get(label).copied().unwrap_or(mapping.len() + 1);
let renumbered_log10 = renumbered_to.checked_ilog10().unwrap_or(0);
// Try to convert the u32 to usize; if we can't, just guess a length of 3.
// That should be plenty!
usize::try_from(renumbered_log10 + 1).unwrap_or(3)
}
}
}
}

#[cfg(test)]
mod test {
use crate::footnote_transform::FootnoteTransformer;
use crate::output::Output;

#[test]
fn inactive() {
let mut transformer = FootnoteTransformer::new(false);
check("abc", &mut transformer, "abc", 3);
check("1", &mut transformer, "1", 1);
check("3", &mut transformer, "3", 1);

// remember the old value
check("1", &mut transformer, "1", 1);
}

#[test]
fn active() {
let mut transformer = FootnoteTransformer::new(true);
check("abc", &mut transformer, "1", 1);
check("1", &mut transformer, "2", 1);
check("3", &mut transformer, "3", 1);

// remember the old value
check("1", &mut transformer, "2", 1);
}

#[test]
fn active_with_ten_footnotes() {
let mut transformer = FootnoteTransformer::new(true);

// write nine labels; we don't care about the results
let nine_labels: Vec<_> = (1..10).map(|i| format!("footnote-{i}")).collect();
for label in &nine_labels {
transformer.write(&mut Output::new(String::new()), &label);
}

// the tenth label should remap to "10" with an expected len of 2
check("z", &mut transformer, "10", 2);
}

fn check<'a>(
input: &'a str,
transformer: &mut FootnoteTransformer<'a>,
expect: &str,
expect_transformed_len: usize,
) {
// len-calculation should work before and after we first officially see the label. So, try
// this once before transformer.write, and then later we'll try it again.
assert_eq!(
transformer.new_to_stringer().transformed_label_len(input),
expect_transformed_len
);

let mut output = Output::new(String::with_capacity(expect.len()));
transformer.write(&mut output, input);
let actual = output.take_underlying().unwrap();
assert_eq!(&actual, expect);

assert_eq!(
transformer.new_to_stringer().transformed_label_len(input),
expect_transformed_len
);
}
}
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ pub mod cli;
mod fmt_md;
mod fmt_md_inlines;
mod fmt_str;
mod footnote_transform;
mod link_transform;
mod matcher;
mod output;
Expand Down Expand Up @@ -82,6 +83,7 @@ where
footnote_reference_placement: cli.footnote_pos.unwrap_or(cli.link_pos),
inline_options: MdInlinesWriterOptions {
link_format: cli.link_format,
renumber_footnotes: cli.renumber_footnotes,
},
};

Expand Down
3 changes: 3 additions & 0 deletions src/link_transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ impl<'md> LinkLabel<'md> {
LinkLabel::Inline(inlines) => {
let mut inline_writer = MdInlinesWriter::new(MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
});
inlines_to_string(&mut inline_writer, *inlines)
}
Expand Down Expand Up @@ -419,6 +420,7 @@ mod tests {
let mut transformer = LinkTransformer::from(LinkTransform::Reference);
let mut iw = MdInlinesWriter::new(MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
});

// [alpha](https://example.com) ==> [alpha][1]
Expand Down Expand Up @@ -505,6 +507,7 @@ mod tests {
let mut transformer = LinkTransformer::from(transform);
let mut iw = MdInlinesWriter::new(MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
});
let link = Link {
text: vec![label],
Expand Down
5 changes: 5 additions & 0 deletions src/output.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ impl<W: SimpleWrite> Output<W> {
}
}

pub fn replace_underlying(&mut self, new: W) -> std::io::Result<W> {
self.stream.flush()?;
Ok(std::mem::replace(&mut self.stream, new))
}

pub fn with_block<F>(&mut self, block: Block, action: F)
where
F: FnOnce(&mut Self),
Expand Down
4 changes: 3 additions & 1 deletion src/tree_ref_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub struct SerdeDoc<'md> {
#[serde(skip_serializing_if = "HashMap::is_empty")]
links: HashMap<Cow<'md, str>, UrlAndTitle<'md>>,
#[serde(skip_serializing_if = "HashMap::is_empty")]
footnotes: HashMap<&'md String, Vec<SerdeElem<'md>>>,
footnotes: HashMap<String, Vec<SerdeElem<'md>>>,
}

#[derive(Serialize)]
Expand Down Expand Up @@ -703,13 +703,15 @@ mod tests {
fn check(given: MdElem, expect: &str) {
let opts = MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
};
check_with(opts, MdElemRef::from(&given), expect);
}

fn check_md_ref(given: MdElemRef, expect: &str) {
let opts = MdInlinesWriterOptions {
link_format: LinkTransform::Keep,
renumber_footnotes: false,
};
check_with(opts, given, expect);
}
Expand Down
1 change: 1 addition & 0 deletions src/utils_for_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ mod test_utils {
footnote_reference_placement: ReferencePlacement::default_for_tests(),
inline_options: MdInlinesWriterOptions {
link_format: LinkTransform::default_for_tests(),
renumber_footnotes: false,
},
}
}
Expand Down
Loading

0 comments on commit 8da4a62

Please sign in to comment.