Skip to content

Commit

Permalink
fix: more test fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
scanny committed Dec 18, 2024
1 parent 2795137 commit 7160d2d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 15 deletions.
4 changes: 2 additions & 2 deletions test_unstructured/partition/test_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def fake_iter_document_elements(self: _DocxPartitioner) -> Iterator[Element]:

EXPECTED_EMAIL_OUTPUT = [
NarrativeText(text="This is a test email to use for unit tests."),
Title(text="Important points:"),
Text(text="Important points:"),
ListItem(text="Roses are red"),
ListItem(text="Violets are blue"),
]
Expand Down Expand Up @@ -440,7 +440,7 @@ def test_partition_md_from_url_works_with_embedded_html():
def test_auto_partition_msg_from_filename():
assert partition(example_doc_path("fake-email.msg"), strategy=PartitionStrategy.HI_RES) == [
NarrativeText(text="This is a test email to use for unit tests."),
Title(text="Important points:"),
Text(text="Important points:"),
ListItem(text="Roses are red"),
ListItem(text="Violets are blue"),
]
Expand Down
10 changes: 5 additions & 5 deletions test_unstructured/partition/test_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

EXPECTED_OUTPUT = [
NarrativeText(text="This is a test email to use for unit tests."),
Title(text="Important points:"),
Text(text="Important points:"),
ListItem(text="Roses are red"),
ListItem(text="Violets are blue"),
]
Expand Down Expand Up @@ -88,9 +88,9 @@ def test_extract_email_from_text_plain_matches_elements_extracted_from_text_html
elements_from_text = partition_email(file_path, content_source="text/plain")
elements_from_html = partition_email(file_path, content_source="text/html")

assert elements_from_text == EXPECTED_OUTPUT
assert all(e.text == eo.text for e, eo in zip(elements_from_text, EXPECTED_OUTPUT))
assert elements_from_html == EXPECTED_OUTPUT
assert elements_from_html == elements_from_text
assert all(eh.text == et.text for eh, et in zip(elements_from_html, elements_from_text))


def test_partition_email_round_trips_via_json():
Expand Down Expand Up @@ -354,14 +354,14 @@ def test_partition_email_can_process_attachments():
)

assert elements == [
Title("Hello!"),
Text("Hello!"),
NarrativeText("Here's the attachments!"),
NarrativeText("It includes:"),
ListItem("Lots of whitespace"),
ListItem("Little to no content"),
ListItem("and is a quick read"),
Text("Best,"),
Title("Mallori"),
Text("Mallori"),
NarrativeText("Hey this is a fake attachment!"),
]
assert all(e.metadata.last_modified == "2022-12-23T18:08:48+00:00" for e in elements)
Expand Down
15 changes: 7 additions & 8 deletions test_unstructured/partition/test_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@
ListItem,
NarrativeText,
Text,
Title,
)
from unstructured.partition.common import UnsupportedFileFormatError
from unstructured.partition.msg import MsgPartitionerOptions, partition_msg

EXPECTED_MSG_OUTPUT = [
NarrativeText(text="This is a test email to use for unit tests."),
Title(text="Important points:"),
Text(text="Important points:"),
ListItem(text="Roses are red"),
ListItem(text="Violets are blue"),
]
Expand Down Expand Up @@ -138,9 +137,9 @@ def test_partition_msg_can_process_attachments():
assert [type(e).__name__ for e in elements][:10] == [
"NarrativeText",
"Text",
"Title",
"Title",
"Title",
"Text",
"Text",
"Text",
"Image",
"Title",
"Text",
Expand Down Expand Up @@ -175,9 +174,9 @@ def test_partition_msg_silently_skips_attachments_it_cannot_partition(request: F
# -- the email body is partitioned --
NarrativeText("Here are those documents."),
Text("--"),
Title("Mallori Harrell"),
Title("Unstructured Technologies"),
Title("Data Scientist"),
Text("Mallori Harrell"),
Text("Unstructured Technologies"),
Text("Data Scientist"),
# -- no elements appear for the attachment(s) --
]

Expand Down

0 comments on commit 7160d2d

Please sign in to comment.