Skip to content

Commit

Permalink
cleaning up
Browse files Browse the repository at this point in the history
  • Loading branch information
davidsbatista committed Dec 9, 2024
1 parent 3cdc2df commit d834b39
Showing 1 changed file with 0 additions and 28 deletions.
28 changes: 0 additions & 28 deletions test/components/preprocessors/test_document_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,31 +753,3 @@ def test_run_split_by_word_respect_sentence_boundary_with_split_overlap_and_page
assert documents[5].meta["page_number"] == 3
assert documents[5].meta["split_id"] == 5
assert documents[5].meta["split_idx_start"] == text.index(documents[5].content)


# ToDo: move to SentenceSplitter tests
# class TestSentenceSplitter:
# def test_apply_split_rules_second_while_loop(self) -> None:
# text = "This is a test. (With a parenthetical statement.) And another sentence."
# spans = [(0, 15), (16, 50), (51, 74)]
# result = SentenceSplitter._apply_split_rules(text, spans)
# assert len(result) == 2
# assert result == [(0, 50), (51, 74)]
#
# def test_apply_split_rules_no_join(self) -> None:
# text = "This is a test. This is another test. And a third test."
# spans = [(0, 15), (16, 36), (37, 54)]
# result = SentenceSplitter._apply_split_rules(text, spans)
# assert len(result) == 3
# assert result == [(0, 15), (16, 36), (37, 54)]
#
# @pytest.mark.parametrize(
# "text,span,next_span,quote_spans,expected",
# [
# # triggers sentence boundary is inside a quote
# ('He said, "Hello World." Then left.', (0, 15), (16, 23), [(9, 23)], True)
# ],
# )
# def test_needs_join_cases(self, text, span, next_span, quote_spans, expected):
# result = SentenceSplitter._needs_join(text, span, next_span, quote_spans)
# assert result == expected, f"Expected {expected} for input: {text}, {span}, {next_span}, {quote_spans}"

0 comments on commit d834b39

Please sign in to comment.