diff --git a/test_unstructured/partition/test_auto.py b/test_unstructured/partition/test_auto.py index 74187aa3b0..815c603cd7 100644 --- a/test_unstructured/partition/test_auto.py +++ b/test_unstructured/partition/test_auto.py @@ -175,7 +175,7 @@ def fake_iter_document_elements(self: _DocxPartitioner) -> Iterator[Element]: EXPECTED_EMAIL_OUTPUT = [ NarrativeText(text="This is a test email to use for unit tests."), - Title(text="Important points:"), + Text(text="Important points:"), ListItem(text="Roses are red"), ListItem(text="Violets are blue"), ] @@ -440,7 +440,7 @@ def test_partition_md_from_url_works_with_embedded_html(): def test_auto_partition_msg_from_filename(): assert partition(example_doc_path("fake-email.msg"), strategy=PartitionStrategy.HI_RES) == [ NarrativeText(text="This is a test email to use for unit tests."), - Title(text="Important points:"), + Text(text="Important points:"), ListItem(text="Roses are red"), ListItem(text="Violets are blue"), ] diff --git a/test_unstructured/partition/test_email.py b/test_unstructured/partition/test_email.py index eb34d499ca..5d5937d3a4 100644 --- a/test_unstructured/partition/test_email.py +++ b/test_unstructured/partition/test_email.py @@ -30,7 +30,7 @@ EXPECTED_OUTPUT = [ NarrativeText(text="This is a test email to use for unit tests."), - Title(text="Important points:"), + Text(text="Important points:"), ListItem(text="Roses are red"), ListItem(text="Violets are blue"), ] @@ -88,9 +88,9 @@ def test_extract_email_from_text_plain_matches_elements_extracted_from_text_html elements_from_text = partition_email(file_path, content_source="text/plain") elements_from_html = partition_email(file_path, content_source="text/html") - assert elements_from_text == EXPECTED_OUTPUT + assert all(e.text == eo.text for e, eo in zip(elements_from_text, EXPECTED_OUTPUT)) assert elements_from_html == EXPECTED_OUTPUT - assert elements_from_html == elements_from_text + assert all(eh.text == et.text for eh, et in zip(elements_from_html, elements_from_text)) def test_partition_email_round_trips_via_json(): @@ -354,14 +354,14 @@ def test_partition_email_can_process_attachments(): ) assert elements == [ - Title("Hello!"), + Text("Hello!"), NarrativeText("Here's the attachments!"), NarrativeText("It includes:"), ListItem("Lots of whitespace"), ListItem("Little to no content"), ListItem("and is a quick read"), Text("Best,"), - Title("Mallori"), + Text("Mallori"), NarrativeText("Hey this is a fake attachment!"), ] assert all(e.metadata.last_modified == "2022-12-23T18:08:48+00:00" for e in elements) diff --git a/test_unstructured/partition/test_msg.py b/test_unstructured/partition/test_msg.py index 43f49a0108..d1d66876ed 100644 --- a/test_unstructured/partition/test_msg.py +++ b/test_unstructured/partition/test_msg.py @@ -23,14 +23,13 @@ ListItem, NarrativeText, Text, - Title, ) from unstructured.partition.common import UnsupportedFileFormatError from unstructured.partition.msg import MsgPartitionerOptions, partition_msg EXPECTED_MSG_OUTPUT = [ NarrativeText(text="This is a test email to use for unit tests."), - Title(text="Important points:"), + Text(text="Important points:"), ListItem(text="Roses are red"), ListItem(text="Violets are blue"), ] @@ -138,9 +137,9 @@ def test_partition_msg_can_process_attachments(): assert [type(e).__name__ for e in elements][:10] == [ "NarrativeText", "Text", - "Title", - "Title", - "Title", + "Text", + "Text", + "Text", "Image", "Title", "Text", @@ -175,9 +174,9 @@ def test_partition_msg_silently_skips_attachments_it_cannot_partition(request: F # -- the email body is partitioned -- NarrativeText("Here are those documents."), Text("--"), - Title("Mallori Harrell"), - Title("Unstructured Technologies"), - Title("Data Scientist"), + Text("Mallori Harrell"), + Text("Unstructured Technologies"), + Text("Data Scientist"), # -- no elements appear for the attachment(s) -- ]