Skip to content

Commit

Permalink
Restyle #3393 introduce hyper parameters and config (#3516)
Browse files Browse the repository at this point in the history
* Work in progress

* added file parsing and name validation + adjust schema

* Exceptions on bad input

* Support multiple parameters

* Support multi 's in
Having any troubles? Hit us up at https://dvc.org/support, we are always happy to help!

* Restyled by black

Co-authored-by: elgehelge <[email protected]>
Co-authored-by: Restyled.io <[email protected]>
  • Loading branch information
3 people authored Mar 19, 2020
1 parent caf5b93 commit 65d22c7
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 15 deletions.
8 changes: 8 additions & 0 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def run(self):
metrics=self.args.metrics,
metrics_no_cache=self.args.metrics_no_cache,
deps=self.args.deps,
params=self.args.params,
fname=self.args.file,
cwd=self.args.cwd,
wdir=self.args.wdir,
Expand Down Expand Up @@ -111,6 +112,13 @@ def add_parser(subparsers, parent_parser):
help="Declare output file or directory "
"(do not put into DVC cache).",
)
run_parser.add_argument(
"-p",
"--params",
action="append",
default=[],
help="Declare parameter to use as additional dependency.",
)
run_parser.add_argument(
"-m",
"--metrics",
Expand Down
27 changes: 17 additions & 10 deletions dvc/dependency/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dvc.dependency.local import DependencyLOCAL
from dvc.dependency.s3 import DependencyS3
from dvc.dependency.ssh import DependencySSH
from dvc.dependency.param import DependencyPARAMS
from dvc.output.base import OutputBase
from dvc.remote import Remote
from dvc.scheme import Schemes
Expand Down Expand Up @@ -42,37 +43,43 @@
SCHEMA = output.SCHEMA.copy()
del SCHEMA[OutputBase.PARAM_CACHE]
del SCHEMA[OutputBase.PARAM_METRIC]
SCHEMA[DependencyREPO.PARAM_REPO] = DependencyREPO.REPO_SCHEMA
SCHEMA.update(DependencyREPO.REPO_SCHEMA)
SCHEMA.update(DependencyPARAMS.PARAM_SCHEMA)


def _get(stage, p, info):
parsed = urlparse(p)
def _get_by_path(stage, path, info):
parsed = urlparse(path)

if parsed.scheme == "remote":
remote = Remote(stage.repo, name=parsed.netloc)
return DEP_MAP[remote.scheme](stage, p, info, remote=remote)
return DEP_MAP[remote.scheme](stage, path, info, remote=remote)

if info and info.get(DependencyREPO.PARAM_REPO):
repo = info.pop(DependencyREPO.PARAM_REPO)
return DependencyREPO(repo, stage, p, info)
return DependencyREPO(repo, stage, path, info)

for d in DEPS:
if d.supported(p):
return d(stage, p, info)
return DependencyLOCAL(stage, p, info)
if d.supported(path):
return d(stage, path, info)
return DependencyLOCAL(stage, path, info)


def loadd_from(stage, d_list):
ret = []
for d in d_list:
p = d.pop(OutputBase.PARAM_PATH)
ret.append(_get(stage, p, d))
ret.append(_get_by_path(stage, p, d))
return ret


def loads_from(stage, s_list, erepo=None):
ret = []
for s in s_list:
info = {DependencyREPO.PARAM_REPO: erepo} if erepo else {}
ret.append(_get(stage, s, info))
dep_obj = _get_by_path(stage, s, info)
ret.append(dep_obj)
return ret


def loads_params(stage, s_list): # TODO: Make support for `eropo=` as well ?
return DependencyPARAMS.from_list(stage, s_list)
107 changes: 107 additions & 0 deletions dvc/dependency/param.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import json
import re
from itertools import groupby

from dvc.dependency.local import DependencyLOCAL
from dvc.exceptions import DvcException


class BadParamNameError(DvcException):
def __init__(self, param_name):
msg = "Parameter name '{}' is not valid".format(param_name)
super().__init__(msg)


class BadParamFileError(DvcException):
def __init__(self, path):
msg = "Parameter file '{}' could not be read".format(path)
super().__init__(msg)


class DependencyPARAMS(DependencyLOCAL):
# SCHEMA:
# params:
# - <parameter name>: <parameter value>
# - <parameter name>: <parameter value>
PARAM_PARAMS = "params"
PARAM_SCHEMA = {PARAM_PARAMS: {str: str}}
FILE_DELIMITER = ":"
PARAM_DELIMITER = ","
DEFAULT_PARAMS_FILE = "params.json"

REGEX_SUBNAME = r"\w+"
REGEX_NAME = r"{sub}(\.{sub})*".format(sub=REGEX_SUBNAME)
REGEX_MULTI_PARAMS = r"^{param}(,{param})*$".format(param=REGEX_NAME)
REGEX_COMPILED = re.compile(REGEX_MULTI_PARAMS)

def __init__(self, stage, input_str, *args, **kwargs):
path, param_names = self._parse_and_validate_input(input_str)
super().__init__(stage, path, *args, **kwargs)
self.param_names = sorted(param_names.split(self.PARAM_DELIMITER))
self.param_values = {}

def __str__(self):
path = super().__str__()
return self._reverse_parse_input(path, self.param_names)

@classmethod
def from_list(cls, stage, s_list):
# Creates an object for each unique file that is referenced in the list
ret = []
pathname_tuples = [cls._parse_and_validate_input(s) for s in s_list]
grouped_by_path = groupby(sorted(pathname_tuples), key=lambda x: x[0])
for path, group in grouped_by_path:
param_names = [g[1] for g in group]
regrouped_input = cls._reverse_parse_input(path, param_names)
ret.append(DependencyPARAMS(stage, regrouped_input))
return ret

@classmethod
def _parse_and_validate_input(cls, input_str):
path, _, param_names = input_str.rpartition(cls.FILE_DELIMITER)
cls._validate_input(param_names)
path = path or cls.DEFAULT_PARAMS_FILE
return path, param_names

@classmethod
def _reverse_parse_input(cls, path, param_names):
return "{path}{delimiter}{params}".format(
path=path,
delimiter=cls.FILE_DELIMITER,
params=cls.PARAM_DELIMITER.join(param_names),
)

@classmethod
def _validate_input(cls, param_names):
if not cls.REGEX_COMPILED.match(param_names):
raise BadParamNameError(param_names)

def save(self):
super().save()
params_in_file = self._parse_file()
self.param_values = {k: params_in_file[k] for k in self.param_names}

def dumpd(self):
return {
self.PARAM_PATH: self.def_path,
self.PARAM_PARAMS: self.param_values,
}

@property
def exists(self):
file_exists = super().exists
params_in_file = self._parse_file()
params_exists = all([p in params_in_file for p in self.param_names])
return file_exists and params_exists

def _parse_file(self):
try:
return self._params_cache
except AttributeError:
path = self.path_info.fspath
with open(path, "r") as fp:
try:
self._params_cache = json.load(fp)
except json.JSONDecodeError:
raise BadParamFileError(path)
return self._params_cache
8 changes: 5 additions & 3 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ class DependencyREPO(DependencyLOCAL):
PARAM_REV_LOCK = "rev_lock"

REPO_SCHEMA = {
Required(PARAM_URL): str,
PARAM_REV: str,
PARAM_REV_LOCK: str,
PARAM_REPO: {
Required(PARAM_URL): str,
PARAM_REV: str,
PARAM_REV_LOCK: str,
}
}

def __init__(self, def_repo, stage, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -533,9 +533,11 @@ def create(repo, accompany_outs=False, **kwargs):
)

Stage._fill_stage_outputs(stage, **kwargs)
stage.deps = dependency.loads_from(
deps = dependency.loads_from(
stage, kwargs.get("deps", []), erepo=kwargs.get("erepo", None)
)
params = dependency.loads_params(stage, kwargs.get("params", []))
stage.deps = deps + params

stage._check_circular_dependency()
stage._check_duplicated_arguments()
Expand Down
6 changes: 6 additions & 0 deletions tests/basic_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class TestDirFixture(object):
# in tests, we replace foo with bar, so we need to make sure that when we
# modify a file in our tests, its content length changes.
BAR_CONTENTS = BAR + "r"
PARAMSDEFAULT = "params.json"
PARAMSDEFAULT_CONTENTS = '{"p_one": "1", "p_two": "1"}'
PARAMS = "par.json"
PARAMS_CONTENTS = '{"p_three": "3"}'
CODE = "code.py"
CODE_CONTENTS = (
"import sys\nimport shutil\n"
Expand Down Expand Up @@ -87,6 +91,8 @@ def setUp(self):
self._pushd(self._root_dir)
self.create(self.FOO, self.FOO_CONTENTS)
self.create(self.BAR, self.BAR_CONTENTS)
self.create(self.PARAMSDEFAULT, self.PARAMSDEFAULT_CONTENTS)
self.create(self.PARAMS, self.PARAMS_CONTENTS)
self.create(self.CODE, self.CODE_CONTENTS)
os.mkdir(self.DATA_DIR)
os.mkdir(self.DATA_SUB_DIR)
Expand Down
4 changes: 3 additions & 1 deletion tests/func/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class TestRun(TestDvc):
def test(self):
cmd = "python {} {} {}".format(self.CODE, self.FOO, "out")
deps = [self.FOO, self.CODE]
params = ["p_one", "p_two", "par.json:p_three"]
outs = [os.path.join(self.dvc.root_dir, "out")]
outs_no_cache = []
fname = "out.dvc"
Expand All @@ -45,6 +46,7 @@ def test(self):
cmd=cmd,
deps=deps,
outs=outs,
params=params,
outs_no_cache=outs_no_cache,
fname=fname,
cwd=cwd,
Expand All @@ -53,7 +55,7 @@ def test(self):
self.assertTrue(filecmp.cmp(self.FOO, "out", shallow=False))
self.assertTrue(os.path.isfile(stage.path))
self.assertEqual(stage.cmd, cmd)
self.assertEqual(len(stage.deps), len(deps))
self.assertEqual(len(stage.deps), len(deps) + 2)
self.assertEqual(len(stage.outs), len(outs + outs_no_cache))
self.assertEqual(stage.outs[0].fspath, outs[0])
self.assertEqual(stage.outs[0].checksum, file_md5(self.FOO)[0])
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/dependency/test_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import mock

from dvc.dependency import DependencyPARAMS
from dvc.stage import Stage
from tests.basic_env import TestDvc


class TestDependencyPARAM(TestDvc):
def test_from_list(self):
stage = Stage(self.dvc)
deps = DependencyPARAMS.from_list(
stage, ["foo", "bar,baz", "a_file:qux"]
)
assert len(deps) == 2
assert deps[0].def_path == "a_file"
assert deps[0].param_names == ["qux"]
assert deps[1].def_path == DependencyPARAMS.DEFAULT_PARAMS_FILE
assert deps[1].param_names == ["bar", "baz", "foo"]

0 comments on commit 65d22c7

Please sign in to comment.