diff --git a/dvc/fs/base.py b/dvc/fs/base.py index 4ab79a3fdc..b9aaeeff25 100644 --- a/dvc/fs/base.py +++ b/dvc/fs/base.py @@ -43,7 +43,6 @@ class FileSystem: REQUIRES: ClassVar[Dict[str, str]] = {} _JOBS = 4 * cpu_count() - CHECKSUM_DIR_SUFFIX = ".dir" HASH_JOBS = max(1, min(4, cpu_count() // 2)) LIST_OBJECT_PAGE_SIZE = 1000 TRAVERSE_WEIGHT_MULTIPLIER = 5 @@ -218,12 +217,6 @@ def reflink(self, from_info, to_info): # pylint: enable=unused-argument - @classmethod - def is_dir_hash(cls, hash_): - if not hash_: - return False - return hash_.endswith(cls.CHECKSUM_DIR_SUFFIX) - def upload( self, from_info, diff --git a/dvc/objects/db/__init__.py b/dvc/objects/db/__init__.py index cabff21372..97ca9492ce 100644 --- a/dvc/objects/db/__init__.py +++ b/dvc/objects/db/__init__.py @@ -46,7 +46,6 @@ def get_index(odb) -> "ObjectDBIndexBase": hashlib.sha256( odb.fs.unstrip_protocol(odb.fs_path).encode("utf-8") ).hexdigest(), - odb.fs.CHECKSUM_DIR_SUFFIX, ) diff --git a/dvc/objects/db/base.py b/dvc/objects/db/base.py index 46c2693e94..485a412742 100644 --- a/dvc/objects/db/base.py +++ b/dvc/objects/db/base.py @@ -372,17 +372,22 @@ def gc(self, used, jobs=None, cache_odb=None, shallow=True): entry_obj.hash_info.value for _, entry_obj in tree ) + def _is_dir_hash(_hash): + from dvc.hash_info import HASH_DIR_SUFFIX + + return _hash.endswith(HASH_DIR_SUFFIX) + removed = False # hashes must be sorted to ensure we always remove .dir files first for hash_ in sorted( self.all(jobs, self.fs_path), - key=self.fs.is_dir_hash, + key=_is_dir_hash, reverse=True, ): if hash_ in used_hashes: continue fs_path = self.hash_to_path(hash_) - if self.fs.is_dir_hash(hash_): + if _is_dir_hash(hash_): # backward compatibility # pylint: disable=protected-access self._remove_unpacked_dir(hash_) diff --git a/dvc/objects/db/index.py b/dvc/objects/db/index.py index e928123520..4dc952a640 100644 --- a/dvc/objects/db/index.py +++ b/dvc/objects/db/index.py @@ -1,7 +1,7 @@ import logging import os from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Iterable, Optional, Set +from typing import TYPE_CHECKING, Iterable, Set from ..errors import ObjectDBError @@ -14,7 +14,9 @@ class ObjectDBIndexBase(ABC): @abstractmethod def __init__( - self, tmp_dir: "StrPath", name: str, dir_suffix: Optional[str] = None + self, + tmp_dir: "StrPath", + name: str, ): pass @@ -50,7 +52,9 @@ class ObjectDBIndexNoop(ObjectDBIndexBase): """No-op class for ODBs which are not indexed.""" def __init__( - self, tmp_dir: "StrPath", name: str, dir_suffix: Optional[str] = None + self, + tmp_dir: "StrPath", + name: str, ): # pylint: disable=super-init-not-called pass @@ -80,7 +84,9 @@ class ObjectDBIndex(ObjectDBIndexBase): INDEX_DIR = "index" def __init__( - self, tmp_dir: "StrPath", name: str, dir_suffix: Optional[str] = None + self, + tmp_dir: "StrPath", + name: str, ): # pylint: disable=super-init-not-called from diskcache import Index @@ -92,10 +98,6 @@ def __init__( self.fs = LocalFileSystem() self.index = Index(self.index_dir) - if not dir_suffix: - dir_suffix = self.fs.CHECKSUM_DIR_SUFFIX - self.dir_suffix = dir_suffix - def __iter__(self): return iter(self.index) @@ -106,9 +108,6 @@ def dir_hashes(self): """Iterate over .dir hashes stored in the index.""" yield from (hash_ for hash_, is_dir in self.index.items() if is_dir) - def is_dir_hash(self, hash_: str): - return hash_.endswith(self.dir_suffix) - def clear(self): """Clear this index (to force re-indexing later).""" from diskcache import Timeout diff --git a/tests/unit/remote/test_base.py b/tests/unit/remote/test_base.py index 8156645712..b61c5532a5 100644 --- a/tests/unit/remote/test_base.py +++ b/tests/unit/remote/test_base.py @@ -135,11 +135,3 @@ def test_list_paths(dvc): walk_mock.assert_called_with( posixpath.join(path, "00", "0"), prefix=True ) - - -@pytest.mark.parametrize( - "hash_, result", - [(None, False), ("", False), ("3456.dir", True), ("3456", False)], -) -def test_is_dir_hash(hash_, result): - assert FileSystem.is_dir_hash(hash_) == result diff --git a/tests/unit/remote/test_index.py b/tests/unit/remote/test_index.py index 8195daf3d0..272aff5fa6 100644 --- a/tests/unit/remote/test_index.py +++ b/tests/unit/remote/test_index.py @@ -16,11 +16,6 @@ def test_init(dvc, index): assert str(index.index_dir) == os.path.join(dvc.tmp_dir, "index", "foo") -def test_is_dir_hash(dvc, index): - assert index.is_dir_hash("foo.dir") - assert not index.is_dir_hash("foo") - - def test_roundtrip(dvc, index): expected_dir = {"1234.dir"} expected_file = {"5678"}