Skip to content

Commit

Permalink
fs: get rid of CHECKSUM_DIR_SUFFIX and is_dir_hash
Browse files Browse the repository at this point in the history
These are leftovers from old remotes, that clearly have nothing
to do with filesystems.
  • Loading branch information
efiop committed Dec 10, 2021
1 parent 82c5cae commit 81eda7e
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 34 deletions.
7 changes: 0 additions & 7 deletions dvc/fs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ class FileSystem:
REQUIRES: ClassVar[Dict[str, str]] = {}
_JOBS = 4 * cpu_count()

CHECKSUM_DIR_SUFFIX = ".dir"
HASH_JOBS = max(1, min(4, cpu_count() // 2))
LIST_OBJECT_PAGE_SIZE = 1000
TRAVERSE_WEIGHT_MULTIPLIER = 5
Expand Down Expand Up @@ -218,12 +217,6 @@ def reflink(self, from_info, to_info):

# pylint: enable=unused-argument

@classmethod
def is_dir_hash(cls, hash_):
if not hash_:
return False
return hash_.endswith(cls.CHECKSUM_DIR_SUFFIX)

def upload(
self,
from_info,
Expand Down
1 change: 0 additions & 1 deletion dvc/objects/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def get_index(odb) -> "ObjectDBIndexBase":
hashlib.sha256(
odb.fs.unstrip_protocol(odb.fs_path).encode("utf-8")
).hexdigest(),
odb.fs.CHECKSUM_DIR_SUFFIX,
)


Expand Down
9 changes: 7 additions & 2 deletions dvc/objects/db/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,17 +372,22 @@ def gc(self, used, jobs=None, cache_odb=None, shallow=True):
entry_obj.hash_info.value for _, entry_obj in tree
)

def _is_dir_hash(_hash):
from dvc.hash_info import HASH_DIR_SUFFIX

return _hash.endswith(HASH_DIR_SUFFIX)

removed = False
# hashes must be sorted to ensure we always remove .dir files first
for hash_ in sorted(
self.all(jobs, self.fs_path),
key=self.fs.is_dir_hash,
key=_is_dir_hash,
reverse=True,
):
if hash_ in used_hashes:
continue
fs_path = self.hash_to_path(hash_)
if self.fs.is_dir_hash(hash_):
if _is_dir_hash(hash_):
# backward compatibility
# pylint: disable=protected-access
self._remove_unpacked_dir(hash_)
Expand Down
21 changes: 10 additions & 11 deletions dvc/objects/db/index.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING, Iterable, Optional, Set
from typing import TYPE_CHECKING, Iterable, Set

from ..errors import ObjectDBError

Expand All @@ -14,7 +14,9 @@
class ObjectDBIndexBase(ABC):
@abstractmethod
def __init__(
self, tmp_dir: "StrPath", name: str, dir_suffix: Optional[str] = None
self,
tmp_dir: "StrPath",
name: str,
):
pass

Expand Down Expand Up @@ -50,7 +52,9 @@ class ObjectDBIndexNoop(ObjectDBIndexBase):
"""No-op class for ODBs which are not indexed."""

def __init__(
self, tmp_dir: "StrPath", name: str, dir_suffix: Optional[str] = None
self,
tmp_dir: "StrPath",
name: str,
): # pylint: disable=super-init-not-called
pass

Expand Down Expand Up @@ -80,7 +84,9 @@ class ObjectDBIndex(ObjectDBIndexBase):
INDEX_DIR = "index"

def __init__(
self, tmp_dir: "StrPath", name: str, dir_suffix: Optional[str] = None
self,
tmp_dir: "StrPath",
name: str,
): # pylint: disable=super-init-not-called
from diskcache import Index

Expand All @@ -92,10 +98,6 @@ def __init__(
self.fs = LocalFileSystem()
self.index = Index(self.index_dir)

if not dir_suffix:
dir_suffix = self.fs.CHECKSUM_DIR_SUFFIX
self.dir_suffix = dir_suffix

def __iter__(self):
return iter(self.index)

Expand All @@ -106,9 +108,6 @@ def dir_hashes(self):
"""Iterate over .dir hashes stored in the index."""
yield from (hash_ for hash_, is_dir in self.index.items() if is_dir)

def is_dir_hash(self, hash_: str):
return hash_.endswith(self.dir_suffix)

def clear(self):
"""Clear this index (to force re-indexing later)."""
from diskcache import Timeout
Expand Down
8 changes: 0 additions & 8 deletions tests/unit/remote/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,3 @@ def test_list_paths(dvc):
walk_mock.assert_called_with(
posixpath.join(path, "00", "0"), prefix=True
)


@pytest.mark.parametrize(
"hash_, result",
[(None, False), ("", False), ("3456.dir", True), ("3456", False)],
)
def test_is_dir_hash(hash_, result):
assert FileSystem.is_dir_hash(hash_) == result
5 changes: 0 additions & 5 deletions tests/unit/remote/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,6 @@ def test_init(dvc, index):
assert str(index.index_dir) == os.path.join(dvc.tmp_dir, "index", "foo")


def test_is_dir_hash(dvc, index):
assert index.is_dir_hash("foo.dir")
assert not index.is_dir_hash("foo")


def test_roundtrip(dvc, index):
expected_dir = {"1234.dir"}
expected_file = {"5678"}
Expand Down

0 comments on commit 81eda7e

Please sign in to comment.