Skip to content

Commit

Permalink
fix: address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
scanny committed Oct 18, 2023
1 parent cc4fb9e commit 6860acc
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
19 changes: 19 additions & 0 deletions test_unstructured/documents/test_elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
Element,
ElementMetadata,
NoID,
RegexMetadata,
Text,
)

Expand Down Expand Up @@ -189,6 +190,24 @@ def test_element_to_dict():
assert element.to_dict() == expected


def test_regex_metadata_round_trips_through_JSON():
"""metadata.regex_metadata should appear at full depth in JSON."""
regex_metadata = {
"mail-stop": [RegexMetadata(text="MS-107", start=18, end=24)],
"version": [
RegexMetadata(text="current=v1.7.2", start=7, end=21),
RegexMetadata(text="supersedes=v1.7.2", start=22, end=40),
],
}
metadata = ElementMetadata(regex_metadata=regex_metadata)

metadata_json = json.dumps(metadata.to_dict())
deserialized_metadata = ElementMetadata.from_dict(json.loads(metadata_json))
reserialized_metadata_json = json.dumps(deserialized_metadata.to_dict())

assert reserialized_metadata_json == metadata_json


def test_metadata_from_dict_extra_fields():
"""
Assert that the metadata classes ignore nonexistent fields.
Expand Down
3 changes: 1 addition & 2 deletions unstructured/chunking/title.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Implementation of chunking.
"""Implementation of chunking by title.
Main entry point is the `@add_chunking_strategy()` decorator.
"""
Expand Down Expand Up @@ -133,7 +133,6 @@ def chunk_by_title(
value = cast(List[Any], value)
# -- get existing (list) value from chunk_metadata --
_value = getattr(metadata, attr, []) or []
# TODO: this mutates the original, work on a copy instead.
_value.extend(item for item in value if item not in _value)
setattr(metadata, attr, _value)

Expand Down

0 comments on commit 6860acc

Please sign in to comment.