Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add check_code_block_token_count function #65

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 156 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{
fmt::{self, Write},
};

use pulldown_cmark::{Alignment as TableAlignment, Event, HeadingLevel, LinkType};
use pulldown_cmark::{Alignment as TableAlignment, Event, HeadingLevel, LinkType, Tag};

/// Similar to [Pulldown-Cmark-Alignment][Alignment], but with required
/// traits for comparison to allow testing.
Expand Down Expand Up @@ -76,6 +76,8 @@ pub struct Options<'a> {
pub newlines_after_list: usize,
pub newlines_after_blockquote: usize,
pub newlines_after_rest: usize,
/// Token count for fenced code block. An appropriate value of this field can be decided by
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I see why it was called token count, not backtick count - ~ can be used instead of backticks.

/// [`count_code_block_tokens()`].
pub code_block_token_count: usize,
pub code_block_token: char,
pub list_token: char,
Expand All @@ -85,6 +87,8 @@ pub struct Options<'a> {
pub strong_token: &'a str,
}

const DEFAULT_CODE_BLOCK_TOKEN_COUNT: usize = 4;

const DEFAULT_OPTIONS: Options<'_> = Options {
newlines_after_headline: 2,
newlines_after_paragraph: 2,
Expand All @@ -94,7 +98,7 @@ const DEFAULT_OPTIONS: Options<'_> = Options {
newlines_after_list: 2,
newlines_after_blockquote: 2,
newlines_after_rest: 1,
code_block_token_count: 4,
code_block_token_count: DEFAULT_CODE_BLOCK_TOKEN_COUNT,
code_block_token: '`',
list_token: '*',
ordered_list_token: '.',
Expand Down Expand Up @@ -663,6 +667,82 @@ where
cmark_with_options(events, &mut formatter, Default::default())
}

/// Obtain the appropriate token count for fenced code blocks.
///
/// Use this function to obtain the correct value for `code_block_token_count` field of [`Options`].
///
/// ```rust
/// use pulldown_cmark::Event;
/// use pulldown_cmark_to_cmark::*;
///
/// let events = &[Event::Text("text".into())];
/// let code_block_token_count = count_code_block_tokens(events);
/// let options = Options {
/// code_block_token_count,
/// ..Default::default()
/// };
/// let mut buf = String::new();
/// cmark_with_options(events.iter(), &mut buf, options);
/// ```
pub fn count_code_block_tokens<'a, I, E>(events: I) -> usize
where
I: IntoIterator<Item = E>,
E: Borrow<Event<'a>>,
{
let mut in_codeblock = false;
let mut max_token_count = 0;

// token_count should be taken over Text events
// because a continuous text may be splitted to some Text events.
let mut token_count = 0;
let mut last_token_char = ' ';
for event in events {
match event.borrow() {
Event::Start(Tag::CodeBlock(_)) => {
in_codeblock = true;
last_token_char = ' ';
}
Event::End(Tag::CodeBlock(_)) => {
in_codeblock = false;
last_token_char = ' ';
}
Event::Text(x) if in_codeblock => {
for c in x.chars() {
if c == '`' {
if last_token_char == '`' {
token_count += 1;
} else {
max_token_count = max_token_count.max(token_count);
token_count = 1;
}
last_token_char = '`';
} else if c == '~' {
if last_token_char == '~' {
token_count += 1;
} else {
max_token_count = max_token_count.max(token_count);
token_count = 1;
}
last_token_char = '~';
} else {
last_token_char = ' ';
}
}
}
_ => {
last_token_char = ' ';
}
}
}
max_token_count = max_token_count.max(token_count);
if max_token_count < DEFAULT_CODE_BLOCK_TOKEN_COUNT {
DEFAULT_CODE_BLOCK_TOKEN_COUNT
} else {
// If there are consecutive tokens in codeblock, codeblock token should be extended.
max_token_count + 1
}
}

fn count_consecutive_backticks(text: &str) -> usize {
let mut in_backticks = false;
let mut max_backticks = 0;
Expand All @@ -681,6 +761,80 @@ fn count_consecutive_backticks(text: &str) -> usize {
max_backticks.max(cur_backticks)
}

#[cfg(test)]
mod count_code_block_tokens {
use super::count_code_block_tokens;
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag};

const CODE_BLOCK_START: Event<'_> = Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(""))));
const CODE_BLOCK_END: Event<'_> = Event::End(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(""))));

#[test]
fn no_token() {
let events = &[CODE_BLOCK_START, Event::Text("text".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 4);
}

#[test]
fn backtick() {
let events = &[CODE_BLOCK_START, Event::Text("```".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 4);

let events = &[CODE_BLOCK_START, Event::Text("````".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 5);

let events = &[CODE_BLOCK_START, Event::Text("``````````".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 11);
}

#[test]
fn tilde() {
let events = &[CODE_BLOCK_START, Event::Text("~~~".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 4);

let events = &[CODE_BLOCK_START, Event::Text("~~~~".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 5);

let events = &[CODE_BLOCK_START, Event::Text("~~~~~~~~~~".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 11);
}

#[test]
fn mix() {
let events = &[CODE_BLOCK_START, Event::Text("```~~~~".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 5);

let events = &[CODE_BLOCK_START, Event::Text("~~~~`````~~".into()), CODE_BLOCK_END];
assert_eq!(count_code_block_tokens(events.iter()), 6);

let events = &[
CODE_BLOCK_START,
Event::Text("~~~```````~~~```~~".into()),
CODE_BLOCK_END,
];
assert_eq!(count_code_block_tokens(events.iter()), 8);
}

#[test]
fn splitted_text() {
let events = &[
CODE_BLOCK_START,
Event::Text("~~~".into()),
Event::Text("~~~".into()),
CODE_BLOCK_END,
];
assert_eq!(count_code_block_tokens(events.iter()), 7);

let events = &[
CODE_BLOCK_START,
Event::Text("````".into()),
Event::Text("````".into()),
CODE_BLOCK_END,
];
assert_eq!(count_code_block_tokens(events.iter()), 9);
}
}

#[cfg(test)]
mod count_consecutive_backticks {
use super::count_consecutive_backticks;
Expand Down
Loading