Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add check_code_block_token_count function #65

Merged
merged 2 commits into from
Jan 25, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 159 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::{
fmt::{self, Write},
};

use pulldown_cmark::{Alignment as TableAlignment, Event, HeadingLevel, LinkType};
use pulldown_cmark::{Alignment as TableAlignment, Event, HeadingLevel, LinkType, Tag};

/// Similar to [Pulldown-Cmark-Alignment][Alignment], but with required
/// traits for comparison to allow testing.
Expand Down Expand Up @@ -76,6 +76,8 @@ pub struct Options<'a> {
pub newlines_after_list: usize,
pub newlines_after_blockquote: usize,
pub newlines_after_rest: usize,
/// Token count for fenced code block. An appropriate value of this field can be decided by
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I see why it was called token count, not backtick count - ~ can be used instead of backticks.

/// [`check_code_block_token_count`].
pub code_block_token_count: usize,
pub code_block_token: char,
pub list_token: char,
Expand All @@ -85,6 +87,8 @@ pub struct Options<'a> {
pub strong_token: &'a str,
}

const DEFAULT_CODE_BLOCK_TOKEN_COUNT: usize = 4;

const DEFAULT_OPTIONS: Options<'_> = Options {
newlines_after_headline: 2,
newlines_after_paragraph: 2,
Expand All @@ -94,7 +98,7 @@ const DEFAULT_OPTIONS: Options<'_> = Options {
newlines_after_list: 2,
newlines_after_blockquote: 2,
newlines_after_rest: 1,
code_block_token_count: 4,
code_block_token_count: DEFAULT_CODE_BLOCK_TOKEN_COUNT,
code_block_token: '`',
list_token: '*',
ordered_list_token: '.',
Expand Down Expand Up @@ -663,6 +667,85 @@ where
cmark_with_options(events, &mut formatter, Default::default())
}

/// Check an appropriate token count for fenced code block.
///
/// This can be used to decide a value for `code_block_token_count` field of [`Options`].
///
/// ```rust
/// use pulldown_cmark::Event;
/// use pulldown_cmark_to_cmark::*;
///
/// let events = &[Event::Text("text".into())];
/// let code_block_token_count = check_code_block_token_count(events.iter());
/// let options = Options {
/// code_block_token_count,
/// ..Default::default()
/// };
/// let mut buf = String::new();
/// cmark_with_options(events.iter(), &mut buf, options);
/// ```
pub fn check_code_block_token_count<'a, I, E>(events: I) -> usize
where
I: Iterator<Item = E>,
E: Borrow<Event<'a>>,
{
let mut in_codeblock = false;
let mut max_token_count = 0;

// token_count should be taken over Text events
// because a continuous text may be splitted to some Text events.
let mut token_count = 0;

let mut last_token_char = ' ';

for event in events {
match event.borrow() {
Event::Start(Tag::CodeBlock(_)) => {
in_codeblock = true;
last_token_char = ' ';
}
Event::End(Tag::CodeBlock(_)) => {
in_codeblock = false;
last_token_char = ' ';
}
Event::Text(x) if in_codeblock => {
for c in x.chars() {
if c == '`' {
if last_token_char == '`' {
token_count += 1;
} else {
max_token_count = std::cmp::max(max_token_count, token_count);
token_count = 1;
}
last_token_char = '`';
} else if c == '~' {
if last_token_char == '~' {
token_count += 1;
} else {
max_token_count = std::cmp::max(max_token_count, token_count);
token_count = 1;
}
last_token_char = '~';
} else {
last_token_char = ' ';
}
}
}
_ => {
last_token_char = ' ';
}
}
}
max_token_count = std::cmp::max(max_token_count, token_count);

if max_token_count < DEFAULT_CODE_BLOCK_TOKEN_COUNT {
DEFAULT_CODE_BLOCK_TOKEN_COUNT
} else {
// If there are consecutive tokens in codeblock, codeblock token should be extended.
max_token_count + 1
}
}

fn count_consecutive_backticks(text: &str) -> usize {
let mut in_backticks = false;
let mut max_backticks = 0;
Expand All @@ -681,6 +764,80 @@ fn count_consecutive_backticks(text: &str) -> usize {
max_backticks.max(cur_backticks)
}

#[cfg(test)]
mod check_code_block_token_count {
use super::check_code_block_token_count;
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Tag};

const CODE_BLOCK_START: Event<'_> = Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(""))));
const CODE_BLOCK_END: Event<'_> = Event::End(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::Borrowed(""))));

#[test]
fn no_token() {
let events = &[CODE_BLOCK_START, Event::Text("text".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 4);
}

#[test]
fn backtick() {
let events = &[CODE_BLOCK_START, Event::Text("```".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 4);

let events = &[CODE_BLOCK_START, Event::Text("````".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 5);

let events = &[CODE_BLOCK_START, Event::Text("``````````".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 11);
}

#[test]
fn tilde() {
let events = &[CODE_BLOCK_START, Event::Text("~~~".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 4);

let events = &[CODE_BLOCK_START, Event::Text("~~~~".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 5);

let events = &[CODE_BLOCK_START, Event::Text("~~~~~~~~~~".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 11);
}

#[test]
fn mix() {
let events = &[CODE_BLOCK_START, Event::Text("```~~~~".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 5);

let events = &[CODE_BLOCK_START, Event::Text("~~~~`````~~".into()), CODE_BLOCK_END];
assert_eq!(check_code_block_token_count(events.iter()), 6);

let events = &[
CODE_BLOCK_START,
Event::Text("~~~```````~~~```~~".into()),
CODE_BLOCK_END,
];
assert_eq!(check_code_block_token_count(events.iter()), 8);
}

#[test]
fn splitted_text() {
let events = &[
CODE_BLOCK_START,
Event::Text("~~~".into()),
Event::Text("~~~".into()),
CODE_BLOCK_END,
];
assert_eq!(check_code_block_token_count(events.iter()), 7);

let events = &[
CODE_BLOCK_START,
Event::Text("````".into()),
Event::Text("````".into()),
CODE_BLOCK_END,
];
assert_eq!(check_code_block_token_count(events.iter()), 9);
}
}

#[cfg(test)]
mod count_consecutive_backticks {
use super::count_consecutive_backticks;
Expand Down
Loading