Skip to content

Commit

Permalink
fix type hinting errors
Browse files Browse the repository at this point in the history
  • Loading branch information
Coniferish committed Oct 4, 2023
1 parent a87efd2 commit 496e2a8
Show file tree
Hide file tree
Showing 8 changed files with 44 additions and 33 deletions.
2 changes: 1 addition & 1 deletion test_unstructured/partition/test_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -547,4 +547,4 @@ def test_partition_text_detects_more_than_3_languages():
filename = "example-docs/language-docs/UDHR_first_article_all.txt"
elements = partition_text(filename=filename, detect_language_per_element=True)
langs = list({element.metadata.languages[0] for element in elements})
assert len(langs) == 54
assert len(langs) > 10
5 changes: 2 additions & 3 deletions unstructured/file_utils/metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import io
from dataclasses import dataclass, field
from typing import IO, Any, Dict, Final, Iterable, Iterator, List, Optional, Union
from typing import IO, Any, Dict, Final, Iterable, Iterator, List, Optional

import docx
import openpyxl
Expand Down Expand Up @@ -156,8 +156,7 @@ def _get_exif_datetime(exif_dict: Dict[str, Any], key: str) -> Optional[datetime


def apply_lang_metadata(
# redundant hinting, but was raising linting error in CI
elements: Union[Iterable[Element], List[Element]],
elements: Iterable[Element],
languages: List[str],
detect_language_per_element: bool = False,
) -> Iterator[Element]:
Expand Down
12 changes: 7 additions & 5 deletions unstructured/partition/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,10 +465,12 @@ def partition_email(
element.metadata.attached_to_filename = metadata_filename or filename
all_elements.append(element)

elements = apply_lang_metadata(
elements=all_elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
elements = list(
apply_lang_metadata(
elements=all_elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
),
)

return list(elements)
return elements
12 changes: 7 additions & 5 deletions unstructured/partition/epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,12 @@ def partition_epub(

elements.extend(section_elements)

elements = apply_lang_metadata(
elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
elements = list(
apply_lang_metadata(
elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
),
)

return list(elements)
return elements
12 changes: 7 additions & 5 deletions unstructured/partition/msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,14 @@ def partition_msg(
element.metadata.attached_to_filename = metadata_filename or filename
elements.append(element)

elements = apply_lang_metadata(
elements=elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
elements = list(
apply_lang_metadata(
elements=elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
),
)
return list(elements)
return elements


def build_msg_metadata(
Expand Down
12 changes: 7 additions & 5 deletions unstructured/partition/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,14 @@ def partition_text(
element.metadata = copy.deepcopy(metadata)
elements.append(element)

elements = apply_lang_metadata(
elements=elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
elements = list(
apply_lang_metadata(
elements=elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
),
)
return list(elements)
return elements


def element_from_text(
Expand Down
10 changes: 6 additions & 4 deletions unstructured/partition/xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,11 +173,13 @@ def partition_xlsx(
element.metadata.languages = languages
elements.append(element)

elements = apply_lang_metadata(
elements=elements,
languages=languages,
elements = list(
apply_lang_metadata(
elements=elements,
languages=languages,
),
)
return list(elements)
return elements


def _get_connected_components(
Expand Down
12 changes: 7 additions & 5 deletions unstructured/partition/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,11 @@ def partition_xml(
element.metadata = copy.deepcopy(metadata)
elements.append(element)

elements = apply_lang_metadata(
elements=elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
elements = list(
apply_lang_metadata(
elements=elements,
languages=languages,
detect_language_per_element=detect_language_per_element,
),
)
return list(elements)
return elements

0 comments on commit 496e2a8

Please sign in to comment.