Skip to content

Commit

Permalink
dvc.objects.cache: set type in a more compatible way
Browse files Browse the repository at this point in the history
It turns out that diskcache.Disk saves passed arguments to the database
and uses that all the time. So this means that we are not forward
compatible as it breaks moving from newer version of DVC to an older
one. We don't promise this, but this breaks our benchmarks which
are probably not isolated enough.

Regarding the use of cache, it is a bit iffy to extend it this way.
We only need this type for error messages. We could consider removing
this, but since the error message is documented and it's not too hard
to extend this :), I decided to preserve the message.
  • Loading branch information
skshetry committed May 13, 2022
1 parent 3f5757a commit dc1d3e8
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 7 deletions.
4 changes: 1 addition & 3 deletions dvc/data/db/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ def __init__(
self.index_dir = os.path.join(tmp_dir, self.INDEX_DIR, name)
makedirs(self.index_dir, exist_ok=True)
self.fs = LocalFileSystem()
cache = Cache(
self.index_dir, eviction_policy="none", disk_type="index"
)
cache = Cache(self.index_dir, eviction_policy="none", type="index")
self.index = Index.fromcache(cache)

def __iter__(self):
Expand Down
10 changes: 7 additions & 3 deletions dvc/objects/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,8 @@ def wrapped(self, *args, **kwargs):
class Disk(disk):
"""Reraise pickle-related errors as DiskError."""

def __init__(self, *args, type=None, **kwargs):
super().__init__(*args, **kwargs)
self._type = type or os.path.basename(self._directory)
# we need type to differentiate cache for better error messages
_type: str

put = translate_pickle_error(disk.put)
get = translate_pickle_error(disk.get)
Expand All @@ -53,9 +52,14 @@ def __init__(
directory: str = None,
timeout: int = 60,
disk: disk = Disk, # pylint: disable=redefined-outer-name
type: str = None,
**settings: Any,
) -> None:
settings.setdefault("disk_pickle_protocol", 4)
super().__init__(
directory=directory, timeout=timeout, disk=disk, **settings
)
self.disk._type = self._type = type or os.path.basename(self.directory)

def __getstate__(self):
return (*super().__getstate__(), self._type)
2 changes: 1 addition & 1 deletion tests/unit/objects/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_pickle_protocol_error(tmp_dir, disk_type):
cache = Cache(
directory,
disk_pickle_protocol=pickle.HIGHEST_PROTOCOL + 1,
disk_type=disk_type,
type=disk_type,
)
with pytest.raises(DiskError) as exc, cache as cache:
set_value(cache, "key", ("value1", "value2"))
Expand Down

0 comments on commit dc1d3e8

Please sign in to comment.