Skip to content
/ dvc Public
forked from iterative/dvc

Commit

Permalink
lockfile: drop 1.0 lockfile support (iterative#9476)
Browse files Browse the repository at this point in the history
  • Loading branch information
skshetry authored May 18, 2023
1 parent 90aaf5a commit 3959c51
Show file tree
Hide file tree
Showing 6 changed files with 12 additions and 159 deletions.
47 changes: 4 additions & 43 deletions dvc/dvcfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
)

from dvc.exceptions import DvcException
from dvc.parsing.versions import LOCKFILE_VERSION, SCHEMA_KWD
from dvc.stage import serialize
from dvc.stage.exceptions import (
StageFileBadNameError,
Expand Down Expand Up @@ -358,36 +357,8 @@ def merge(self, ancestor, other, allowed=None):
raise NotImplementedError


def get_lockfile_schema(d):
from dvc.schema import COMPILED_LOCKFILE_V1_SCHEMA, COMPILED_LOCKFILE_V2_SCHEMA

schema = {
LOCKFILE_VERSION.V1: COMPILED_LOCKFILE_V1_SCHEMA,
LOCKFILE_VERSION.V2: COMPILED_LOCKFILE_V2_SCHEMA,
}

version = LOCKFILE_VERSION.from_dict(d)
return schema[version]


def migrate_lock_v1_to_v2(d, version_info):
stages = dict(d)

for key in stages:
d.pop(key)

# forcing order, meta should always be at the top
d.update(version_info)
d["stages"] = stages


def lockfile_schema(data: _T) -> _T:
schema = get_lockfile_schema(data)
return schema(data)


class Lockfile(FileMixin):
SCHEMA = staticmethod(lockfile_schema) # type: ignore[assignment]
from dvc.schema import COMPILED_LOCKFILE_SCHEMA as SCHEMA

def _verify_filename(self):
pass # lockfile path is hardcoded, so no need to verify here
Expand All @@ -401,21 +372,12 @@ def _load(self, **kwargs: Any):
self._check_gitignored()
return {}, ""

@property
def latest_version_info(self):
version = LOCKFILE_VERSION.V2.value # pylint:disable=no-member
return {SCHEMA_KWD: version}

def dump(self, stage, **kwargs):
stage_data = serialize.to_lockfile(stage, **kwargs)

with modify_yaml(self.path, fs=self.repo.fs) as data:
version = LOCKFILE_VERSION.from_dict(data)
if version == LOCKFILE_VERSION.V1:
logger.info("Migrating lock file '%s' from v1 to v2", self.relpath)
migrate_lock_v1_to_v2(data, self.latest_version_info)
elif not data:
data.update(self.latest_version_info)
if not data:
data.update({"schema": "2.0"})
# order is important, meta should always be at the top
logger.info("Generating lock file '%s'", self.relpath)

Expand All @@ -436,8 +398,7 @@ def remove_stage(self, stage):
return

d, _ = self._load_yaml(round_trip=True)
version = LOCKFILE_VERSION.from_dict(d)
data = d if version == LOCKFILE_VERSION.V1 else d.get("stages", {})
data = d.get("stages", {})
if stage.name not in data:
return

Expand Down
39 changes: 0 additions & 39 deletions dvc/parsing/versions.py

This file was deleted.

12 changes: 4 additions & 8 deletions dvc/schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from collections.abc import Mapping

from voluptuous import Any, Optional, Required, Schema
from voluptuous import Any, Equal, Optional, Required, Schema

from dvc import dependency, output
from dvc.annotations import ANNOTATION_SCHEMA, ARTIFACT_SCHEMA
Expand All @@ -12,7 +12,6 @@
Output,
)
from dvc.parsing import DO_KWD, FOREACH_KWD, VARS_KWD
from dvc.parsing.versions import SCHEMA_KWD, lockfile_version_schema
from dvc.stage.params import StageParams

STAGES = "stages"
Expand Down Expand Up @@ -44,11 +43,9 @@
}

LOCKFILE_STAGES_SCHEMA = {str: LOCK_FILE_STAGE_SCHEMA}

LOCKFILE_V1_SCHEMA = LOCKFILE_STAGES_SCHEMA
LOCKFILE_V2_SCHEMA = {
LOCKFILE_SCHEMA = {
Required("schema"): Equal("2.0", "invalid schema version"),
STAGES: LOCKFILE_STAGES_SCHEMA,
Required(SCHEMA_KWD): lockfile_version_schema,
}

OUT_PSTAGE_DETAILED_SCHEMA = {
Expand Down Expand Up @@ -135,5 +132,4 @@ def validator(data):
COMPILED_SINGLE_STAGE_SCHEMA = Schema(SINGLE_STAGE_SCHEMA)
COMPILED_MULTI_STAGE_SCHEMA = Schema(MULTI_STAGE_SCHEMA)
COMPILED_LOCK_FILE_STAGE_SCHEMA = Schema(LOCK_FILE_STAGE_SCHEMA)
COMPILED_LOCKFILE_V1_SCHEMA = Schema(LOCKFILE_V1_SCHEMA)
COMPILED_LOCKFILE_V2_SCHEMA = Schema(LOCKFILE_V2_SCHEMA)
COMPILED_LOCKFILE_SCHEMA = Schema(LOCKFILE_SCHEMA)
7 changes: 1 addition & 6 deletions dvc/stage/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from dvc import dependency, output
from dvc.parsing import FOREACH_KWD, JOIN, EntryNotFound
from dvc.parsing.versions import LOCKFILE_VERSION
from dvc.utils.objects import cached_property
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.meta import Meta
Expand Down Expand Up @@ -39,11 +38,7 @@ def __init__(
self.repo = self.dvcfile.repo

lockfile_data = lockfile_data or {}
version = LOCKFILE_VERSION.from_dict(lockfile_data)
if version == LOCKFILE_VERSION.V1:
self._lockfile_data = lockfile_data
else:
self._lockfile_data = lockfile_data.get("stages", {})
self._lockfile_data = lockfile_data.get("stages", {})

@cached_property
def lockfile_data(self) -> Dict[str, Any]:
Expand Down
62 changes: 1 addition & 61 deletions tests/func/test_lockfile.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
import logging
import os
from collections import OrderedDict
from operator import itemgetter

import pytest

from dvc.dvcfile import LOCK_FILE, Lockfile
from dvc.dvcfile import LOCK_FILE
from dvc.stage.utils import split_params_deps
from dvc.utils.fs import remove
from dvc.utils.serialize import dumps_yaml, parse_yaml_for_update
from dvc.utils.strictyaml import YAMLValidationError, make_relpath
from dvc_data.hashfile.hash_info import HashInfo
from tests.func.test_run_multistage import supported_params

FS_STRUCTURE = {
Expand Down Expand Up @@ -157,57 +151,3 @@ def test_params_dump(tmp_dir, dvc, run_head):
remove(item)
assert dvc.reproduce(stage.addressing) == [stage]
assert_eq_lockfile(initial_content, read_lock_file())


@pytest.fixture
def v1_repo_lock(tmp_dir, dvc):
"""Generates a repo having v1 format lockfile"""
size = 5 if os.name == "nt" else 4
hi = HashInfo(name="md5", value="c157a79031e1c40f85931829bc5fc552")
v1_lockdata = {
"foo": {"cmd": "echo foo"},
"bar": {
"cmd": "echo bar>bar.txt",
"outs": [{"path": "bar.txt", **hi.to_dict(), "size": size}],
},
}
dvc.run(cmd="echo foo", name="foo", no_exec=True)
dvc.run(cmd="echo bar>bar.txt", outs=["bar.txt"], name="bar", no_exec=True)
(tmp_dir / "dvc.lock").dump(v1_lockdata)
return v1_lockdata


def test_can_read_v1_lockfile(tmp_dir, dvc, v1_repo_lock):
assert dvc.status() == {
"bar": [{"changed outs": {"bar.txt": "not in cache"}}],
"foo": ["always changed"],
}


def test_migrates_v1_lockfile_to_v2_during_dump(tmp_dir, dvc, v1_repo_lock, caplog):
caplog.clear()
with caplog.at_level(logging.INFO, logger="dvc.dvcfile"):
assert dvc.reproduce()

assert "Migrating lock file 'dvc.lock' from v1 to v2" in caplog.messages
d = (tmp_dir / "dvc.lock").parse()
assert d == {"stages": v1_repo_lock, "schema": "2.0"}


@pytest.mark.parametrize(
"version_info", [{"schema": "1.1"}, {"schema": "2.1"}, {"schema": "3.0"}]
)
def test_lockfile_invalid_versions(tmp_dir, dvc, version_info):
lockdata = {**version_info, "stages": {"foo": {"cmd": "echo foo"}}}
(tmp_dir / "dvc.lock").dump(lockdata)
with pytest.raises(YAMLValidationError) as exc_info:
Lockfile(dvc, (tmp_dir / "dvc.lock").fs_path).load()

rel = make_relpath("dvc.lock")
assert f"'{rel}' validation failed" in str(exc_info.value)
assert (
str(exc_info.value.__cause__)
== f"invalid schema version {version_info['schema']}, "
"expected one of ['2.0'] for dictionary value @ "
"data['schema']"
)
4 changes: 2 additions & 2 deletions tests/unit/test_lockfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_stage_dump_no_outs_deps(tmp_dir, dvc):

def test_stage_dump_when_already_exists(tmp_dir, dvc):
data = {"s1": {"cmd": "command", "deps": [], "outs": []}}
(tmp_dir / "path.lock").dump(data)
(tmp_dir / "path.lock").dump({"schema": "2.0", "stages": data})
stage = PipelineStage(name="s2", repo=dvc, path="path", cmd="command2")
lockfile = Lockfile(dvc, "path.lock")
lockfile.dump(stage)
Expand All @@ -35,7 +35,7 @@ def test_stage_dump_with_deps_and_outs(tmp_dir, dvc):
"outs": [{"md5": "2.txt", "path": "checksum"}],
}
}
(tmp_dir / "path.lock").dump(data)
(tmp_dir / "path.lock").dump({"schema": "2.0", "stages": data})
lockfile = Lockfile(dvc, "path.lock")
stage = PipelineStage(name="s2", repo=dvc, path="path", cmd="command2")
lockfile.dump(stage)
Expand Down

0 comments on commit 3959c51

Please sign in to comment.