Skip to content

Commit

Permalink
remove unused code
Browse files Browse the repository at this point in the history
  • Loading branch information
micmarty-deepsense committed Mar 28, 2024
1 parent 7efe3a4 commit 4c393f4
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 33 deletions.
33 changes: 3 additions & 30 deletions unstructured/documents/elements.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import re
import uuid
from types import MappingProxyType
from typing import Any, Callable, FrozenSet, Optional, Sequence, cast
from typing import Any, Callable, FrozenSet, List, Optional, Sequence, cast

from typing_extensions import ParamSpec, TypeAlias, TypedDict

Expand All @@ -32,12 +32,6 @@ class NoID(abc.ABC):
"""Class to indicate that an element do not have an ID."""


class HashValue(str):
"""Class to indicate that an element has a hash value assigned to its ID."""

pass


class UUID(abc.ABC):
"""Class to indicate that an element should have a UUID."""

Expand Down Expand Up @@ -510,8 +504,6 @@ def field_consolidation_strategies(cls) -> dict[str, ConsolidationStrategy]:

_P = ParamSpec("_P")

from typing import List


def calculate_hash(text: str, page_number: int, index_in_sequence: int) -> str:
"""
Expand Down Expand Up @@ -817,35 +809,16 @@ def __init__(
self.text: str = text
self.embeddings: Optional[list[float]] = embeddings

if isinstance(element_id, NoID):
self.id = self._calculate_hash()
elif isinstance(element_id, UUID):
self.id = uuid.uuid4()
elif isinstance(element_id, str):
self.id = element_id
else:
raise ValueError("ID must be a string, UUID, or NoID")
element_id = str(uuid.uuid4())

super().__init__(
element_id=self.id,
element_id=element_id,
metadata=metadata,
coordinates=coordinates,
coordinate_system=coordinate_system,
detection_origin=detection_origin,
)

def _calculate_hash(self, index_in_sequence: int = 0) -> HashValue:
"""Calculate the hash depending on element's text and index in sequence.
Args:
index_in_sequence: Index of the element in the sequence of all elements.
Returns:
HashValue - 128-bit hash value of the element.
"""
data = f"{self.text}"
return HashValue(hashlib.sha256(data.encode()).hexdigest()[:32])

def __eq__(self, other: object):
if not isinstance(other, Text):
return False
Expand Down
1 change: 0 additions & 1 deletion unstructured/file_utils/filetype.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,6 @@ def wrapper(*args: _P.args, **kwargs: _P.kwargs) -> List[Element]:
metadata_kwargs = {
kwarg: params.get(kwarg) for kwarg in ("filename", "url", "text_as_html")
}

# NOTE (yao): do not use cast here as cast(None) still is None
if not str(kwargs.get("model_name", "")).startswith("chipper"):
# NOTE(alan): Skip hierarchy if using chipper, as it should take care of that
Expand Down
2 changes: 0 additions & 2 deletions unstructured/partition/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
Element,
ElementMetadata,
ElementType,
HashValue,
ListItem,
NoID,
PageBreak,
Text,
Title,
Expand Down

0 comments on commit 4c393f4

Please sign in to comment.