Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

https://github.com/jackdewinter/pymarkdown/issues/1267 #1279

Merged
merged 13 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions newdocs/src/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@
- Parsing of the FCB in certain cases was off, as was the text token
containing the code block's text. Resulted in the columns being
reported being indented less than expected.
- [Issue 1274](https://github.com/jackdewinter/pymarkdown/issues/1274)
- Fixed remaining assert issues, leaving fixes that produce valid
Markdown, but not the intended Markdown.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The description of Issue 1274's fix could be clearer

Consider rewording to clarify what was fixed and what remains to be addressed. For example: 'Fixed assert issues while preserving Markdown validity, though some cases may still not produce the exact intended Markdown output.'

Suggested change
- Fixed remaining assert issues, leaving fixes that produce valid
Markdown, but not the intended Markdown.
- Fixed assert issues while preserving Markdown validity, though some cases may still not produce the exact intended Markdown output.

- [Issue 1267](https://github.com/jackdewinter/pymarkdown/issues/1267)
- Fixed reported issue with task lists creating an error in Md018

<!--- pyml disable-next-line no-duplicate-heading-->
### Changed
Expand Down
8 changes: 4 additions & 4 deletions publish/coverage.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
"projectName": "pymarkdown",
"reportSource": "pytest",
"branchLevel": {
"totalMeasured": 5489,
"totalCovered": 5489
"totalMeasured": 5503,
"totalCovered": 5503
},
"lineLevel": {
"totalMeasured": 21362,
"totalCovered": 21362
"totalMeasured": 21393,
"totalCovered": 21393
}
}

8 changes: 4 additions & 4 deletions publish/test-results.json
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,7 @@
},
{
"name": "test.rules.test_md018",
"totalTests": 29,
"totalTests": 30,
"failedTests": 0,
"errorTests": 0,
"skippedTests": 0,
Expand Down Expand Up @@ -1367,7 +1367,7 @@
"totalTests": 653,
"failedTests": 0,
"errorTests": 0,
"skippedTests": 61,
"skippedTests": 56,
"elapsedTimeInMilliseconds": 0
},
{
Expand Down Expand Up @@ -1620,10 +1620,10 @@
},
{
"name": "test.test_markdown_extra",
"totalTests": 263,
"totalTests": 271,
"failedTests": 0,
"errorTests": 0,
"skippedTests": 3,
"skippedTests": 1,
"elapsedTimeInMilliseconds": 0
},
{
Expand Down
62 changes: 30 additions & 32 deletions pymarkdown/container_blocks/container_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,14 @@ def __reduce_containers_if_required_bq_list(
whitespace_prefix = ""
# list_indent_level = None
if parser_state.token_stack[-1].is_list:
# leading_space_length = (
# len(extracted_whitespace) + position_marker.index_indent
# )
assert parser_state.original_line_to_parse is not None
leading_space_length = parser_state.original_line_to_parse.index(
position_marker.text_to_parse
) + len(extracted_whitespace)
search_index = len(parser_state.token_stack)
leading_space_length = (
len(extracted_whitespace) + position_marker.index_indent
)
while parser_state.token_stack[search_index - 1].is_list:
list_token = cast(
ListStackToken, parser_state.token_stack[search_index - 1]
Expand All @@ -69,14 +73,8 @@ def __reduce_containers_if_required_bq_list(
new_tokens.extend(container_level_tokens)

indent_delta = list_token.indent_level - position_marker.index_indent
if len(extracted_whitespace) > indent_delta:
whitespace_prefix = extracted_whitespace[:indent_delta]
extracted_whitespace = extracted_whitespace[indent_delta:]

# Covered by test_extra_044mcz3, currently disabled.
else:
whitespace_prefix = extracted_whitespace
extracted_whitespace = ""
whitespace_prefix = extracted_whitespace[:indent_delta]
extracted_whitespace = extracted_whitespace[indent_delta:]
return did_once, extracted_whitespace, whitespace_prefix

@staticmethod
Expand Down Expand Up @@ -182,28 +180,28 @@ def __reduce_containers_if_required_bq(
def __handle_whitespace_prefix(
parser_state: ParserState, whitespace_prefix: str, last_newline_part: str
) -> Optional[str]:
new_whitespace_prefix = None
if whitespace_prefix:
indent_level = 0
stack_index = len(parser_state.token_stack) - 1
while stack_index > 0:
if parser_state.token_stack[stack_index].is_list:
indent_level += cast(
ListStartMarkdownToken,
parser_state.token_stack[stack_index].matching_markdown_token,
).indent_level
break
bleading_spaces = cast(
BlockQuoteMarkdownToken,
if not whitespace_prefix:
return None

indent_level = 0
stack_index = len(parser_state.token_stack) - 1
while stack_index > 0:
if parser_state.token_stack[stack_index].is_list:
indent_level += cast(
ListStartMarkdownToken,
parser_state.token_stack[stack_index].matching_markdown_token,
).bleading_spaces
assert bleading_spaces is not None
split_bleading_spaces = bleading_spaces.split("\n")
last_split_bleading_spaces = len(split_bleading_spaces[-1])
indent_level += last_split_bleading_spaces
stack_index -= 1
new_whitespace_prefix = last_newline_part[indent_level:]
return new_whitespace_prefix
).indent_level
break
bleading_spaces = cast(
BlockQuoteMarkdownToken,
parser_state.token_stack[stack_index].matching_markdown_token,
).bleading_spaces
assert bleading_spaces is not None
split_bleading_spaces = bleading_spaces.split("\n")
last_split_bleading_spaces = len(split_bleading_spaces[-1])
indent_level += last_split_bleading_spaces
stack_index -= 1
return last_newline_part[indent_level:]

# pylint: disable=too-many-arguments
@staticmethod
Expand Down
48 changes: 29 additions & 19 deletions pymarkdown/inline/inline_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,31 @@ def initialize(extension_manager: ExtensionManager) -> None:
"""
InlineHandlerHelper.initialize(extension_manager)

@staticmethod
def __parse_inline_tracker_start(
lsi_tracker: LeadingSpaceIndexTracker, token: MarkdownToken
) -> None:
if token.is_block_quote_start or token.is_list_start:
lsi_tracker.open_container(token)
# if token.is_block_quote_end or token.is_list_end:
assert not (token.is_block_quote_end or token.is_list_end)
# lsi_tracker.register_container_end(token)
# elif
lsi_tracker.track_since_last_non_end_token(token)

@staticmethod
def __parse_inline_tracker_per_token(
lsi_tracker: LeadingSpaceIndexTracker, token: MarkdownToken
) -> None:
if not token.is_end_token or token.is_end_of_stream:
while lsi_tracker.have_any_registered_container_ends():
lsi_tracker.process_container_end(token)
if token.is_block_quote_start or token.is_list_start:
lsi_tracker.open_container(token)
elif token.is_block_quote_end or token.is_list_end:
lsi_tracker.register_container_end(token)
lsi_tracker.track_since_last_non_end_token(token)

@staticmethod
def parse_inline(
coalesced_results: List[MarkdownToken],
Expand Down Expand Up @@ -72,27 +97,13 @@ def parse_inline(
else:
POGGER.info("-->not bq-")

lsi_tracker = LeadingSpaceIndexTracker()
token = coalesced_results[0]
if token.is_block_quote_start or token.is_list_start:
lsi_tracker.open_container(token)
# if token.is_block_quote_end or token.is_list_end:
assert not (token.is_block_quote_end or token.is_list_end)
# lsi_tracker.register_container_end(token)
# elif
lsi_tracker.track_since_last_non_end_token(token)
lsi_tracker = LeadingSpaceIndexTracker()
InlineProcessor.__parse_inline_tracker_start(lsi_tracker, token)
for coalesce_index in range(1, len(coalesced_results)):

token = coalesced_results[coalesce_index]

if not token.is_end_token or token.is_end_of_stream:
while lsi_tracker.have_any_registered_container_ends():
lsi_tracker.process_container_end(token)
if token.is_block_quote_start or token.is_list_start:
lsi_tracker.open_container(token)
elif token.is_block_quote_end or token.is_list_end:
lsi_tracker.register_container_end(token)
lsi_tracker.track_since_last_non_end_token(token)
InlineProcessor.__parse_inline_tracker_per_token(lsi_tracker, token)

InlineProcessor.__process_next_coalesce_item(
coalesced_results,
Expand Down Expand Up @@ -404,8 +415,7 @@ def __parse_code_block(
leading_whitespace = ParserHelper.remove_all_from_text(
leading_whitespace
)
# POGGER.info("leading_whitespace:$<", leading_whitespace)
new_column_number += len(leading_whitespace)
new_column_number = 1 + len(leading_whitespace)
else:
line_number_delta, new_column_number = (
0,
Expand Down
2 changes: 2 additions & 0 deletions pymarkdown/leaf_blocks/thematic_leaf_block_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ def __handle_special_case(
"__handle_special_case>>list_token>>$",
inner_list_markdown_token,
)
if leading_space_to_move:
leading_space_to_move += ParserLogger.blah_sequence
inner_list_markdown_token.add_leading_spaces(leading_space_to_move)
POGGER.debug(
"__handle_special_case>>list_token>>$",
Expand Down
1 change: 1 addition & 0 deletions pymarkdown/plugins/rule_md_018.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ def __next_token_paragraph_non_text_inline(self, token: MarkdownToken) -> None:
token.is_inline_emphasis
or token.is_inline_emphasis_end
or token.is_inline_autolink
or token.is_task_list
)
self.__delayed_line = None

Expand Down
49 changes: 24 additions & 25 deletions pymarkdown/plugins/rule_md_031.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,33 +613,32 @@ def __calc_3(
)
found_block_quote_token = None
assert last_stack_token_index >= 0
assert (
token_at_index := self.__leading_space_index_tracker.get_container_stack_item(
token_at_index = (
self.__leading_space_index_tracker.get_container_stack_item(
last_stack_token_index
)
).is_block_quote_start
found_block_quote_token = token_at_index
# while last_stack_token_index >= 0:
# if (
# token_at_index := self.__leading_space_index_tracker.get_container_stack_item(
# last_stack_token_index
# )
# ).is_block_quote_start:
# found_block_quote_token = token_at_index
# break
# last_stack_token_index -= 1

# assert was if
assert (
found_block_quote_token
and len(self.__x1) == 2
and self.__x1[0].is_list_start
and self.__x1[1].is_block_quote_start
)
did_process_removals = upgrade_kludge = (
self.__apply_tailing_block_quote_fix(1, context)
)
# endif
if token_at_index.is_block_quote_start: # pragma: no cover
found_block_quote_token = token_at_index
# while last_stack_token_index >= 0:
# if (
# token_at_index := self.__leading_space_index_tracker.get_container_stack_item(
# last_stack_token_index
# )
# ).is_block_quote_start:
# found_block_quote_token = token_at_index
# break
# last_stack_token_index -= 1

if (
found_block_quote_token
and len(self.__x1) == 2
and self.__x1[0].is_list_start
and self.__x1[1].is_block_quote_start
):
did_process_removals = upgrade_kludge = (
self.__apply_tailing_block_quote_fix(1, context)
)
return did_process_removals, upgrade_kludge

def __apply_tailing_block_quote_fix(
Expand Down Expand Up @@ -727,7 +726,7 @@ def __handle_fenced_code_block(
and not self.__last_non_end_token.is_blank_line
and can_trigger
):
if context.in_fix_mode:
if context.in_fix_mode and not context.is_during_line_pass:
self.__fix_spacing(context, token, special_case)
else:
self.report_next_token_error(context, token)
Expand Down
2 changes: 1 addition & 1 deletion pymarkdown/tokens/list_start_markdown_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ def remove_last_leading_space(self) -> Optional[str]:
extracted_text = self.__leading_spaces
self.__leading_spaces = None
else:
extracted_text = self.__leading_spaces[last_separator_index:]
extracted_text = self.__leading_spaces[last_separator_index + 1 :]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue: Add bounds check for last_separator_index before adding 1

If last_separator_index is -1, adding 1 will still result in an invalid index. Consider adding a bounds check or handling this edge case explicitly.

self.__leading_spaces = self.__leading_spaces[:last_separator_index]
self.__compose_extra_data_field()
return extracted_text
Expand Down
23 changes: 16 additions & 7 deletions pymarkdown/tokens/markdown_token.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,18 @@ class MarkdownToken:
extra_data_separator = ":"

_end_token_prefix = "end-"

_token_pragma = "pragma"
_token_task_list = "task-list"
_token_end_of_stream = "end-of-stream"

_token_task_list = "task-list"
_token_front_matter = "front-matter"

_token_block_quote = "block-quote"
_token_unordered_list_start = "ulist"
_token_ordered_list_start = "olist"
_token_new_list_item = "li"

_token_paragraph = "para"
_token_blank_line = "BLANK"
_token_atx_heading = "atx"
Expand All @@ -46,13 +54,7 @@ class MarkdownToken:
_token_html_block = "html-block"
_token_fenced_code_block = "fcode-block"
_token_indented_code_block = "icode-block"
_token_block_quote = "block-quote"
_token_text = "text"
_token_front_matter = "front-matter"

_token_unordered_list_start = "ulist"
_token_ordered_list_start = "olist"
_token_new_list_item = "li"

_token_inline_code_span = "icode-span"
_token_inline_hard_break = "hard-break"
Expand Down Expand Up @@ -380,6 +382,13 @@ def is_front_matter(self) -> bool:
"""
return self.token_name == MarkdownToken._token_front_matter

@property
def is_task_list(self) -> bool:
"""
Returns whether the current token is the task list element.
"""
return self.token_name == MarkdownToken._token_task_list

@property
def is_text(self) -> bool:
"""
Expand Down
Loading