Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use 'schema' top-level element in the lockfile for version #5222

Merged
merged 1 commit into from
Jan 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from voluptuous import MultipleInvalid

from dvc.exceptions import DvcException
from dvc.parsing.versions import LOCKFILE_VERSION, META_KWD
from dvc.parsing.versions import LOCKFILE_VERSION, SCHEMA_KWD
from dvc.stage import serialize
from dvc.stage.exceptions import (
StageFileBadNameError,
Expand Down Expand Up @@ -289,14 +289,14 @@ def get_lockfile_schema(d):
return schema[version]


def migrate_lock_v1_to_v2(d, meta):
def migrate_lock_v1_to_v2(d, version_info):
stages = {k: v for k, v in d.items()}

for key in stages:
d.pop(key)

# forcing order, meta should always be at the top
d.update(meta)
d.update(version_info)
d["stages"] = stages


Expand All @@ -323,9 +323,9 @@ def load(self):
return data

@property
def meta(self):
def latest_version_info(self):
version = LOCKFILE_VERSION.V2.value # pylint:disable=no-member
return {META_KWD: {"version": version}}
return {SCHEMA_KWD: version}

def dump(self, stage, **kwargs):
stage_data = serialize.to_lockfile(stage)
Expand All @@ -336,10 +336,10 @@ def dump(self, stage, **kwargs):
logger.info(
"Migrating lock file '%s' from v1 to v2", self.relpath
)
migrate_lock_v1_to_v2(data, self.meta)
migrate_lock_v1_to_v2(data, self.latest_version_info)
else:
if not data:
data.update(self.meta)
data.update(self.latest_version_info)
# order is important, meta should always be at the top
logger.info("Generating lock file '%s'", self.relpath)

Expand Down
15 changes: 9 additions & 6 deletions dvc/parsing/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

from voluptuous import validators

VERSION_KWD = "version"
SCHEMA_KWD = "schema"
META_KWD = "meta"


def lockfile_version_schema(value):
expected = [LOCKFILE_VERSION.V2.value] # pylint: disable=no-member
msg = "invalid version {}, expected one of {}".format(value, expected)
msg = "invalid schema version {}, expected one of {}".format(
value, expected
)
return validators.Any(*expected, msg=msg)(value)


Expand All @@ -26,13 +28,14 @@ class LOCKFILE_VERSION(VersionEnum):
@classmethod
def from_dict(cls, data):
# 1) if it's empty or or is not a dict, use the latest one (V2).
# 2) use the `meta.version`
# 3) if it's not in any of the supported version, use the latest schema
# 4) if there's no version identifier, it's a V1
# 2) use the `schema` identifier if it exists and is a supported
# version
# 3) if it's not in any of the supported version, use the latest one
# 4) if there's no identifier, it's a V1
if not data or not isinstance(data, Mapping):
return cls(cls.V2)

version = data.get(META_KWD, {}).get(VERSION_KWD)
version = data.get(SCHEMA_KWD)
if version:
return cls(version if version in cls.all_versions() else cls.V2)
return cls(cls.V1)
4 changes: 2 additions & 2 deletions dvc/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dvc.hash_info import HashInfo
from dvc.output import CHECKSUMS_SCHEMA, BaseOutput
from dvc.parsing import DO_KWD, FOREACH_KWD, VARS_KWD
from dvc.parsing.versions import META_KWD, VERSION_KWD, lockfile_version_schema
from dvc.parsing.versions import SCHEMA_KWD, lockfile_version_schema
from dvc.stage.params import StageParams

STAGES = "stages"
Expand Down Expand Up @@ -40,7 +40,7 @@
LOCKFILE_V1_SCHEMA = LOCKFILE_STAGES_SCHEMA
LOCKFILE_V2_SCHEMA = {
STAGES: LOCKFILE_STAGES_SCHEMA,
META_KWD: {Required(VERSION_KWD): lockfile_version_schema},
Required(SCHEMA_KWD): lockfile_version_schema,
}

OUT_PSTAGE_DETAILED_SCHEMA = {
Expand Down
11 changes: 5 additions & 6 deletions tests/func/test_lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,23 +206,22 @@ def test_migrates_v1_lockfile_to_v2_during_dump(

assert "Migrating lock file 'dvc.lock' from v1 to v2" in caplog.messages
d = load_yaml(tmp_dir / "dvc.lock")
assert d == {"stages": v1_repo_lock, "meta": {"version": "2.0"}}
assert d == {"stages": v1_repo_lock, "schema": "2.0"}


@pytest.mark.parametrize(
"version_info",
[{"version": "1.1"}, {"version": "2.1"}, {"version": "3.0"}],
"version_info", [{"schema": "1.1"}, {"schema": "2.1"}, {"schema": "3.0"}],
)
def test_lockfile_invalid_versions(tmp_dir, dvc, version_info):
lockdata = {"meta": version_info, "stages": {"foo": {"cmd": "echo foo"}}}
lockdata = {**version_info, "stages": {"foo": {"cmd": "echo foo"}}}
dump_yaml("dvc.lock", lockdata)
with pytest.raises(LockfileCorruptedError) as exc_info:
Lockfile(dvc, tmp_dir / "dvc.lock").load()

assert str(exc_info.value) == "Lockfile 'dvc.lock' is corrupted."
assert (
str(exc_info.value.__cause__) == "'dvc.lock' format error: "
f"invalid version {version_info['version']}, "
f"invalid schema version {version_info['schema']}, "
"expected one of ['2.0'] for dictionary value @ "
"data['meta']['version']"
"data['schema']"
)
3 changes: 1 addition & 2 deletions tests/func/test_run_multistage.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,7 @@ def test_run_external_outputs(

assert (tmp_dir / "dvc.yaml").read_text() == dvc_yaml
assert (tmp_dir / "dvc.lock").read_text() == (
"meta:\n"
" version: '2.0'\n"
"schema: '2.0'\n"
"stages:\n"
" mystage:\n"
" cmd: mycmd\n"
Expand Down
8 changes: 4 additions & 4 deletions tests/unit/test_lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ def test_stage_dump_no_outs_deps(tmp_dir, dvc):
lockfile = Lockfile(dvc, "path.lock")
lockfile.dump(stage)
assert lockfile.load() == {
"schema": "2.0",
"stages": {"s1": {"cmd": "command"}},
"meta": {"version": "2.0"},
}


Expand All @@ -22,8 +22,8 @@ def test_stage_dump_when_already_exists(tmp_dir, dvc):
lockfile = Lockfile(dvc, "path.lock")
lockfile.dump(stage)
assert lockfile.load() == {
"schema": "2.0",
"stages": {**data, "s2": {"cmd": "command2"}},
"meta": {"version": "2.0"},
}


Expand All @@ -40,8 +40,8 @@ def test_stage_dump_with_deps_and_outs(tmp_dir, dvc):
stage = PipelineStage(name="s2", repo=dvc, path="path", cmd="command2")
lockfile.dump(stage)
assert lockfile.load() == {
"schema": "2.0",
"stages": {**data, "s2": {"cmd": "command2"}},
"meta": {"version": "2.0"},
}


Expand All @@ -52,8 +52,8 @@ def test_stage_overwrites_if_already_exists(tmp_dir, dvc):
stage = PipelineStage(name="s2", repo=dvc, path="path", cmd="command3")
lockfile.dump(stage)
assert lockfile.load() == {
"schema": "2.0",
"stages": {"s2": {"cmd": "command3"}},
"meta": {"version": "2.0"},
}


Expand Down