Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restyle #3393 introduce hyper parameters and config #3516

Merged
merged 6 commits into from
Mar 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def run(self):
metrics=self.args.metrics,
metrics_no_cache=self.args.metrics_no_cache,
deps=self.args.deps,
params=self.args.params,
fname=self.args.file,
cwd=self.args.cwd,
wdir=self.args.wdir,
Expand Down Expand Up @@ -111,6 +112,13 @@ def add_parser(subparsers, parent_parser):
help="Declare output file or directory "
"(do not put into DVC cache).",
)
run_parser.add_argument(
"-p",
"--params",
action="append",
default=[],
help="Declare parameter to use as additional dependency.",
)
run_parser.add_argument(
"-m",
"--metrics",
Expand Down
27 changes: 17 additions & 10 deletions dvc/dependency/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dvc.dependency.local import DependencyLOCAL
from dvc.dependency.s3 import DependencyS3
from dvc.dependency.ssh import DependencySSH
from dvc.dependency.param import DependencyPARAMS
from dvc.output.base import OutputBase
from dvc.remote import Remote
from dvc.scheme import Schemes
Expand Down Expand Up @@ -42,37 +43,43 @@
SCHEMA = output.SCHEMA.copy()
del SCHEMA[OutputBase.PARAM_CACHE]
del SCHEMA[OutputBase.PARAM_METRIC]
SCHEMA[DependencyREPO.PARAM_REPO] = DependencyREPO.REPO_SCHEMA
SCHEMA.update(DependencyREPO.REPO_SCHEMA)
SCHEMA.update(DependencyPARAMS.PARAM_SCHEMA)


def _get(stage, p, info):
parsed = urlparse(p)
def _get_by_path(stage, path, info):
parsed = urlparse(path)

if parsed.scheme == "remote":
remote = Remote(stage.repo, name=parsed.netloc)
return DEP_MAP[remote.scheme](stage, p, info, remote=remote)
return DEP_MAP[remote.scheme](stage, path, info, remote=remote)

if info and info.get(DependencyREPO.PARAM_REPO):
repo = info.pop(DependencyREPO.PARAM_REPO)
return DependencyREPO(repo, stage, p, info)
return DependencyREPO(repo, stage, path, info)

for d in DEPS:
if d.supported(p):
return d(stage, p, info)
return DependencyLOCAL(stage, p, info)
if d.supported(path):
return d(stage, path, info)
return DependencyLOCAL(stage, path, info)


def loadd_from(stage, d_list):
ret = []
for d in d_list:
p = d.pop(OutputBase.PARAM_PATH)
ret.append(_get(stage, p, d))
ret.append(_get_by_path(stage, p, d))
return ret


def loads_from(stage, s_list, erepo=None):
ret = []
for s in s_list:
info = {DependencyREPO.PARAM_REPO: erepo} if erepo else {}
ret.append(_get(stage, s, info))
dep_obj = _get_by_path(stage, s, info)
ret.append(dep_obj)
return ret


def loads_params(stage, s_list): # TODO: Make support for `eropo=` as well ?
return DependencyPARAMS.from_list(stage, s_list)
107 changes: 107 additions & 0 deletions dvc/dependency/param.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import json
import re
from itertools import groupby

from dvc.dependency.local import DependencyLOCAL
from dvc.exceptions import DvcException


class BadParamNameError(DvcException):
def __init__(self, param_name):
msg = "Parameter name '{}' is not valid".format(param_name)
super().__init__(msg)


class BadParamFileError(DvcException):
def __init__(self, path):
msg = "Parameter file '{}' could not be read".format(path)
super().__init__(msg)


class DependencyPARAMS(DependencyLOCAL):
# SCHEMA:
# params:
# - <parameter name>: <parameter value>
# - <parameter name>: <parameter value>
PARAM_PARAMS = "params"
PARAM_SCHEMA = {PARAM_PARAMS: {str: str}}
FILE_DELIMITER = ":"
PARAM_DELIMITER = ","
DEFAULT_PARAMS_FILE = "params.json"

REGEX_SUBNAME = r"\w+"
REGEX_NAME = r"{sub}(\.{sub})*".format(sub=REGEX_SUBNAME)
REGEX_MULTI_PARAMS = r"^{param}(,{param})*$".format(param=REGEX_NAME)
REGEX_COMPILED = re.compile(REGEX_MULTI_PARAMS)

def __init__(self, stage, input_str, *args, **kwargs):
path, param_names = self._parse_and_validate_input(input_str)
super().__init__(stage, path, *args, **kwargs)
self.param_names = sorted(param_names.split(self.PARAM_DELIMITER))
self.param_values = {}

def __str__(self):
path = super().__str__()
return self._reverse_parse_input(path, self.param_names)

@classmethod
def from_list(cls, stage, s_list):
# Creates an object for each unique file that is referenced in the list
ret = []
pathname_tuples = [cls._parse_and_validate_input(s) for s in s_list]
grouped_by_path = groupby(sorted(pathname_tuples), key=lambda x: x[0])
for path, group in grouped_by_path:
param_names = [g[1] for g in group]
regrouped_input = cls._reverse_parse_input(path, param_names)
ret.append(DependencyPARAMS(stage, regrouped_input))
return ret

@classmethod
def _parse_and_validate_input(cls, input_str):
path, _, param_names = input_str.rpartition(cls.FILE_DELIMITER)
cls._validate_input(param_names)
path = path or cls.DEFAULT_PARAMS_FILE
return path, param_names

@classmethod
def _reverse_parse_input(cls, path, param_names):
return "{path}{delimiter}{params}".format(
path=path,
delimiter=cls.FILE_DELIMITER,
params=cls.PARAM_DELIMITER.join(param_names),
)

@classmethod
def _validate_input(cls, param_names):
if not cls.REGEX_COMPILED.match(param_names):
raise BadParamNameError(param_names)

def save(self):
super().save()
params_in_file = self._parse_file()
self.param_values = {k: params_in_file[k] for k in self.param_names}

def dumpd(self):
return {
self.PARAM_PATH: self.def_path,
self.PARAM_PARAMS: self.param_values,
}

@property
def exists(self):
file_exists = super().exists
params_in_file = self._parse_file()
params_exists = all([p in params_in_file for p in self.param_names])
return file_exists and params_exists

def _parse_file(self):
try:
return self._params_cache
except AttributeError:
path = self.path_info.fspath
with open(path, "r") as fp:
try:
self._params_cache = json.load(fp)
except json.JSONDecodeError:
raise BadParamFileError(path)
return self._params_cache
8 changes: 5 additions & 3 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ class DependencyREPO(DependencyLOCAL):
PARAM_REV_LOCK = "rev_lock"

REPO_SCHEMA = {
Required(PARAM_URL): str,
PARAM_REV: str,
PARAM_REV_LOCK: str,
PARAM_REPO: {
Required(PARAM_URL): str,
PARAM_REV: str,
PARAM_REV_LOCK: str,
}
}

def __init__(self, def_repo, stage, *args, **kwargs):
Expand Down
4 changes: 3 additions & 1 deletion dvc/stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,9 +530,11 @@ def create(repo, accompany_outs=False, **kwargs):
)

Stage._fill_stage_outputs(stage, **kwargs)
stage.deps = dependency.loads_from(
deps = dependency.loads_from(
stage, kwargs.get("deps", []), erepo=kwargs.get("erepo", None)
)
params = dependency.loads_params(stage, kwargs.get("params", []))
stage.deps = deps + params

stage._check_circular_dependency()
stage._check_duplicated_arguments()
Expand Down
6 changes: 6 additions & 0 deletions tests/basic_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ class TestDirFixture(object):
# in tests, we replace foo with bar, so we need to make sure that when we
# modify a file in our tests, its content length changes.
BAR_CONTENTS = BAR + "r"
PARAMSDEFAULT = "params.json"
PARAMSDEFAULT_CONTENTS = '{"p_one": "1", "p_two": "1"}'
PARAMS = "par.json"
PARAMS_CONTENTS = '{"p_three": "3"}'
CODE = "code.py"
CODE_CONTENTS = (
"import sys\nimport shutil\n"
Expand Down Expand Up @@ -87,6 +91,8 @@ def setUp(self):
self._pushd(self._root_dir)
self.create(self.FOO, self.FOO_CONTENTS)
self.create(self.BAR, self.BAR_CONTENTS)
self.create(self.PARAMSDEFAULT, self.PARAMSDEFAULT_CONTENTS)
self.create(self.PARAMS, self.PARAMS_CONTENTS)
self.create(self.CODE, self.CODE_CONTENTS)
os.mkdir(self.DATA_DIR)
os.mkdir(self.DATA_SUB_DIR)
Expand Down
4 changes: 3 additions & 1 deletion tests/func/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class TestRun(TestDvc):
def test(self):
cmd = "python {} {} {}".format(self.CODE, self.FOO, "out")
deps = [self.FOO, self.CODE]
params = ["p_one", "p_two", "par.json:p_three"]
outs = [os.path.join(self.dvc.root_dir, "out")]
outs_no_cache = []
fname = "out.dvc"
Expand All @@ -45,6 +46,7 @@ def test(self):
cmd=cmd,
deps=deps,
outs=outs,
params=params,
outs_no_cache=outs_no_cache,
fname=fname,
cwd=cwd,
Expand All @@ -53,7 +55,7 @@ def test(self):
self.assertTrue(filecmp.cmp(self.FOO, "out", shallow=False))
self.assertTrue(os.path.isfile(stage.path))
self.assertEqual(stage.cmd, cmd)
self.assertEqual(len(stage.deps), len(deps))
self.assertEqual(len(stage.deps), len(deps) + 2)
self.assertEqual(len(stage.outs), len(outs + outs_no_cache))
self.assertEqual(stage.outs[0].fspath, outs[0])
self.assertEqual(stage.outs[0].checksum, file_md5(self.FOO)[0])
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/dependency/test_params.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import mock

from dvc.dependency import DependencyPARAMS
from dvc.stage import Stage
from tests.basic_env import TestDvc


class TestDependencyPARAM(TestDvc):
def test_from_list(self):
stage = Stage(self.dvc)
deps = DependencyPARAMS.from_list(
stage, ["foo", "bar,baz", "a_file:qux"]
)
assert len(deps) == 2
assert deps[0].def_path == "a_file"
assert deps[0].param_names == ["qux"]
assert deps[1].def_path == DependencyPARAMS.DEFAULT_PARAMS_FILE
assert deps[1].param_names == ["bar", "baz", "foo"]