Skip to content

Commit

Permalink
output: Don't serialize hash_info or meta in cloud versioning.
Browse files Browse the repository at this point in the history
Closes #8357
  • Loading branch information
daavoo committed Jan 20, 2023
1 parent 9af6fca commit 291418f
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 22 deletions.
37 changes: 21 additions & 16 deletions dvc/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,11 @@ def __init__(
name=self.hash_name,
value=getattr(self.meta, self.hash_name, None),
)
if self.meta.nfiles or self.hash_info and self.hash_info.isdir:
if self.files:
tree = Tree.from_list(self.files, hash_name=self.hash_name)
tree.digest()
self.hash_info = tree.hash_info
elif self.meta.nfiles or self.hash_info and self.hash_info.isdir:
self.meta.isdir = True
if not self.hash_info and self.hash_name != "md5":
md5 = getattr(self.meta, "md5", None)
Expand Down Expand Up @@ -747,7 +751,22 @@ def _commit_granular_dir(self, filter_info) -> Optional["HashFile"]:
def dumpd(self, **kwargs): # noqa: C901
meta = self.meta.to_dict()
meta.pop("isdir", None)
ret: Dict[str, Any] = {**self.hash_info.to_dict(), **meta}
ret: Dict[str, Any] = {}
if (
(not self.IS_DEPENDENCY or self.stage.is_import)
and self.hash_info.isdir
and (kwargs.get("with_files") or self.files is not None)
):
obj: Optional["HashFile"]
if self.obj:
obj = self.obj
else:
obj = self.get_obj()
if obj:
obj = cast(Tree, obj)
ret[self.PARAM_FILES] = obj.as_list(with_meta=True)
else:
ret = {**self.hash_info.to_dict(), **meta}

if self.is_in_repo:
path = self.fs.path.as_posix(
Expand Down Expand Up @@ -788,20 +807,6 @@ def dumpd(self, **kwargs): # noqa: C901
if not self.can_push:
ret[self.PARAM_PUSH] = self.can_push

if (
(not self.IS_DEPENDENCY or self.stage.is_import)
and self.hash_info.isdir
and (kwargs.get("with_files") or self.files is not None)
):
obj: Optional["HashFile"]
if self.obj:
obj = self.obj
else:
obj = self.get_obj()
if obj:
obj = cast(Tree, obj)
ret[self.PARAM_FILES] = obj.as_list(with_meta=True)

return ret

def verify_metric(self):
Expand Down
10 changes: 4 additions & 6 deletions dvc/stage/serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,18 +158,16 @@ def to_single_stage_lockfile(stage: "Stage", **kwargs) -> dict:
def _dumpd(item):
meta_d = item.meta.to_dict()
meta_d.pop("isdir", None)
ret = [
(item.PARAM_PATH, item.def_path),
*item.hash_info.to_dict().items(),
*meta_d.items(),
]

if item.hash_info.isdir and kwargs.get("with_files"):
if item.obj:
obj = item.obj
else:
obj = item.get_obj()
ret.append((item.PARAM_FILES, obj.as_list(with_meta=True)))
ret = [((item.PARAM_FILES, obj.as_list(with_meta=True)))]
else:
ret = [*item.hash_info.to_dict().items(), *meta_d.items()]
ret.insert(0, (item.PARAM_PATH, item.def_path))

return OrderedDict(ret)

Expand Down
58 changes: 58 additions & 0 deletions tests/func/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -1142,3 +1142,61 @@ def test_add_with_annotations(M, tmp_dir, dvc):
(stage,) = dvc.add("foo", type="t2")
assert stage.outs[0].annot == Annotation(**annot)
assert (tmp_dir / "foo.dvc").parse() == M.dict(outs=[M.dict(**annot)])


def test_add_updates_to_cloud_versioning_dir(tmp_dir, dvc):
data_dvc = tmp_dir / "data.dvc"
data_dvc.dump(
{
"outs": [
{
"path": "data",
"files": [
{
"size": 3,
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "bar",
},
{
"size": 3,
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "foo",
},
],
}
]
}
)

data = tmp_dir / "data"
data.mkdir()
(data / "foo").write_text("foo")
(data / "bar").write_text("bar2")

dvc.add("data")

assert (tmp_dir / "data.dvc").parse() == {
"outs": [
{
"path": "data",
"files": [
{
"size": 4,
"md5": "224e2539f52203eb33728acd228b4432",
"relpath": "bar",
},
{
"size": 3,
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "foo",
},
],
}
]
}
52 changes: 52 additions & 0 deletions tests/unit/output/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,55 @@ def test_remote_missing_dependency_on_dir_pull(tmp_dir, scm, dvc, mocker):
)
with pytest.raises(RemoteMissingDepsError):
dvc.pull()


def test_hash_info_cloud_versioning_dir(mocker):
stage = mocker.MagicMock()
stage.repo.fs.version_aware = False
stage.repo.fs.PARAM_CHECKSUM = "etag"
files = [
{
"size": 3,
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "bar",
},
{
"size": 3,
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "foo",
},
]
out = Output(stage, "path", files=files)
# hash_info constructed from files
assert out.hash_info.name == "md5"
assert out.hash_info.value == "77e8000f532886eef8ee1feba82e9bad.dir"


def test_dumpd_cloud_versioning_dir(mocker):
stage = mocker.MagicMock()
stage.repo.fs.version_aware = False
stage.repo.fs.PARAM_CHECKSUM = "md5"
files = [
{
"size": 3,
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "bar",
},
{
"size": 3,
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "foo",
},
]
out = Output(stage, "path", files=files)

dumpd = out.dumpd()
assert dumpd == {"path": "path", "files": files}
25 changes: 25 additions & 0 deletions tests/unit/stage/test_serialize_pipeline_lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,3 +255,28 @@ def test_to_lockfile(dvc):
]
)
}


def test_to_single_stage_lockfile_cloud_versioning_dir(dvc):
stage = create_stage(PipelineStage, dvc, outs=["dir"], **kwargs)
stage.outs[0].hash_info = HashInfo("md5", "md-five.dir")
files = [
{
"size": 3,
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "bar",
},
{
"size": 3,
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
"relpath": "foo",
},
]
stage.outs[0].files = files
e = _to_single_stage_lockfile(stage, with_files=True)
assert Schema(e)
assert e["outs"][0] == {"path": "dir", "files": files}

0 comments on commit 291418f

Please sign in to comment.