diff --git a/test_unstructured/partition/csv/test_csv.py b/test_unstructured/partition/csv/test_csv.py index 217a162e67..f1d5ecaf4b 100644 --- a/test_unstructured/partition/csv/test_csv.py +++ b/test_unstructured/partition/csv/test_csv.py @@ -206,9 +206,8 @@ def test_add_chunking_strategy_to_partition_csv_non_default(): assert chunk_elements != elements assert chunk_elements == chunks - + def test_partition_csv_element_metadata_has_languages(): filename = "example-docs/stanley-cups.csv" elements = partition_csv(filename=filename, strategy="fast") assert elements[0].metadata.languages == ["eng"] - \ No newline at end of file diff --git a/test_unstructured/partition/test_html_partition.py b/test_unstructured/partition/test_html_partition.py index b8cbeaf121..06213ecf5f 100644 --- a/test_unstructured/partition/test_html_partition.py +++ b/test_unstructured/partition/test_html_partition.py @@ -668,8 +668,8 @@ def test_html_heading_title_detection(): ListItem("- bulleted item"), ] - + def test_partition_html_element_metadata_has_languages(): filename = "example-docs/example-10k.html" elements = partition_html(filename=filename) - assert elements[0].metadata.languages == ["eng"] \ No newline at end of file + assert elements[0].metadata.languages == ["eng"] diff --git a/unstructured/file_utils/metadata.py b/unstructured/file_utils/metadata.py index d73d7bdd17..7980a41adc 100644 --- a/unstructured/file_utils/metadata.py +++ b/unstructured/file_utils/metadata.py @@ -188,8 +188,9 @@ def apply_lang_metadata( yield e else: for e in elements: - if not hasattr(e, "text"): + if hasattr(e, "text"): + e.metadata.languages = detect_languages(e.text) + yield e + else: yield e continue - e.metadata.languages = detect_languages(e.text) - yield e