diff --git a/dvc/cli.py b/dvc/cli.py index 17d43a974b..29ccc4613d 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -23,6 +23,7 @@ ls, lock, metrics, + params, move, pipeline, remote, @@ -62,6 +63,7 @@ remote, cache, metrics, + params, install, root, ls, diff --git a/dvc/command/metrics.py b/dvc/command/metrics.py index 0f07bffc12..d5ba8c84ff 100644 --- a/dvc/command/metrics.py +++ b/dvc/command/metrics.py @@ -114,21 +114,9 @@ def run(self): def _show_diff(diff): - from texttable import Texttable + from dvc.utils.diff import table - if not diff: - return "" - - table = Texttable() - - # disable automatic formatting - table.set_cols_dtype(["t", "t", "t", "t"]) - - # remove borders to make it easier for users to copy stuff - table.set_chars(("", "", "", "")) - table.set_deco(0) - - rows = [["Path", "Metric", "Value", "Change"]] + rows = [] for fname, mdiff in diff.items(): for metric, change in mdiff.items(): rows.append( @@ -139,8 +127,8 @@ def _show_diff(diff): change.get("diff", "diff not supported"), ] ) - table.add_rows(rows) - return table.draw() + + return table(["Path", "Metric", "Value", "Change"], rows) class CmdMetricsDiff(CmdBase): @@ -160,7 +148,9 @@ def run(self): logger.info(json.dumps(diff)) else: - logger.info(_show_diff(diff)) + table = _show_diff(diff) + if table: + logger.info(table) except DvcException: logger.exception("failed to show metrics diff") diff --git a/dvc/command/params.py b/dvc/command/params.py new file mode 100644 index 0000000000..aafffb2637 --- /dev/null +++ b/dvc/command/params.py @@ -0,0 +1,90 @@ +import argparse +import logging + +from dvc.command.base import append_doc_link +from dvc.command.base import CmdBase +from dvc.command.base import fix_subparsers +from dvc.exceptions import DvcException + + +logger = logging.getLogger(__name__) + + +def _show_diff(diff): + from dvc.utils.diff import table + + rows = [] + for fname, mdiff in diff.items(): + for param, change in mdiff.items(): + rows.append([fname, param, change["old"], change["new"]]) + + return table(["Path", "Param", "Old", "New"], rows) + + +class CmdParamsDiff(CmdBase): + def run(self): + try: + diff = self.repo.params.diff( + a_rev=self.args.a_rev, b_rev=self.args.b_rev, + ) + + if self.args.show_json: + import json + + logger.info(json.dumps(diff)) + else: + table = _show_diff(diff) + if table: + logger.info(table) + + except DvcException: + logger.exception("failed to show params diff") + return 1 + + return 0 + + +def add_parser(subparsers, parent_parser): + PARAMS_HELP = "Commands to display params." + + params_parser = subparsers.add_parser( + "params", + parents=[parent_parser], + description=append_doc_link(PARAMS_HELP, "params"), + help=PARAMS_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + params_subparsers = params_parser.add_subparsers( + dest="cmd", + help="Use `dvc params CMD --help` to display command-specific help.", + ) + + fix_subparsers(params_subparsers) + + PARAMS_DIFF_HELP = ( + "Show changes in params between commits in the DVC repository, or " + "between a commit and the workspace." + ) + params_diff_parser = params_subparsers.add_parser( + "diff", + parents=[parent_parser], + description=append_doc_link(PARAMS_DIFF_HELP, "params/diff"), + help=PARAMS_DIFF_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + params_diff_parser.add_argument( + "a_rev", nargs="?", help="Old Git commit to compare (defaults to HEAD)" + ) + params_diff_parser.add_argument( + "b_rev", + nargs="?", + help=("New Git commit to compare (defaults to the current workspace)"), + ) + params_diff_parser.add_argument( + "--show-json", + action="store_true", + default=False, + help="Show output in JSON format.", + ) + params_diff_parser.set_defaults(func=CmdParamsDiff) diff --git a/dvc/dependency/param.py b/dvc/dependency/param.py index bdb0a06524..3e16c12422 100644 --- a/dvc/dependency/param.py +++ b/dvc/dependency/param.py @@ -52,7 +52,7 @@ def status(self): return status status = defaultdict(dict) - info = self._get_info() + info = self.read_params() for param in self.params: if param not in info.keys(): st = "deleted" @@ -74,7 +74,7 @@ def dumpd(self): self.PARAM_PARAMS: self.info or self.params, } - def _get_info(self): + def read_params(self): if not self.exists: return {} @@ -95,7 +95,7 @@ def _get_info(self): return ret def save_info(self): - info = self._get_info() + info = self.read_params() missing_params = set(self.params) - set(info.keys()) if missing_params: diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index e7f2b4dfd3..e22b996749 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -67,6 +67,7 @@ def __init__(self, root_dir=None): from dvc.cache import Cache from dvc.data_cloud import DataCloud from dvc.repo.metrics import Metrics + from dvc.repo.params import Params from dvc.scm.tree import WorkingTree from dvc.repo.tag import Tag from dvc.utils.fs import makedirs @@ -102,6 +103,7 @@ def __init__(self, root_dir=None): self.cloud = DataCloud(self) self.metrics = Metrics(self) + self.params = Params(self) self.tag = Tag(self) self._ignore() diff --git a/dvc/repo/metrics/diff.py b/dvc/repo/metrics/diff.py index ef25d09d87..697afeae25 100644 --- a/dvc/repo/metrics/diff.py +++ b/dvc/repo/metrics/diff.py @@ -1,80 +1,7 @@ -import json -from collections import defaultdict - -from flatten_json import flatten - +import dvc.utils.diff from dvc.exceptions import NoMetricsError -def _parse(raw): - if raw is None or isinstance(raw, (dict, list, int, float)): - return raw - - assert isinstance(raw, str) - try: - return json.loads(raw) - except json.JSONDecodeError: - return raw - - -def _diff_vals(old, new): - if ( - isinstance(new, list) - and isinstance(old, list) - and len(old) == len(new) == 1 - ): - return _diff_vals(old[0], new[0]) - - if old == new: - return {} - - res = {"old": old, "new": new} - if isinstance(new, (int, float)) and isinstance(old, (int, float)): - res["diff"] = new - old - return res - - -def _flatten(d): - if not d: - return defaultdict(lambda: None) - - if isinstance(d, dict): - return defaultdict(lambda: None, flatten(d, ".")) - - return defaultdict(lambda: "unable to parse") - - -def _diff_dicts(old_dict, new_dict): - new = _flatten(new_dict) - old = _flatten(old_dict) - - res = defaultdict(dict) - - xpaths = set(old.keys()) - xpaths.update(set(new.keys())) - for xpath in xpaths: - old_val = old[xpath] - new_val = new[xpath] - val_diff = _diff_vals(old_val, new_val) - if val_diff: - res[xpath] = val_diff - return dict(res) - - -def _diff(old_raw, new_raw): - old = _parse(old_raw) - new = _parse(new_raw) - - if isinstance(new, dict) or isinstance(old, dict): - return _diff_dicts(old, new) - - val_diff = _diff_vals(old, new) - if val_diff: - return {"": val_diff} - - return {} - - def _get_metrics(repo, *args, rev=None, **kwargs): try: metrics = repo.metrics.show( @@ -89,12 +16,4 @@ def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): old = _get_metrics(repo, *args, **kwargs, rev=(a_rev or "HEAD")) new = _get_metrics(repo, *args, **kwargs, rev=b_rev) - paths = set(old.keys()) - paths.update(set(new.keys())) - - res = defaultdict(dict) - for path in paths: - path_diff = _diff(old.get(path), new.get(path)) - if path_diff: - res[path] = path_diff - return dict(res) + return dvc.utils.diff.diff(old, new) diff --git a/dvc/repo/params/__init__.py b/dvc/repo/params/__init__.py new file mode 100644 index 0000000000..3f7bc4797c --- /dev/null +++ b/dvc/repo/params/__init__.py @@ -0,0 +1,13 @@ +class Params(object): + def __init__(self, repo): + self.repo = repo + + def show(self, *args, **kwargs): + from .show import show + + return show(self.repo, *args, **kwargs) + + def diff(self, *args, **kwargs): + from .diff import diff + + return diff(self.repo, *args, **kwargs) diff --git a/dvc/repo/params/diff.py b/dvc/repo/params/diff.py new file mode 100644 index 0000000000..753fec5e41 --- /dev/null +++ b/dvc/repo/params/diff.py @@ -0,0 +1,30 @@ +import dvc.utils.diff +from .show import NoParamsError + + +def _get_params(repo, *args, rev=None, **kwargs): + try: + params = repo.params.show(*args, **kwargs, revs=[rev] if rev else None) + return params.get(rev or "", {}) + except NoParamsError: + return {} + + +def _format(params): + ret = {} + for key, val in params.items(): + if isinstance(val, dict): + new_val = _format(val) + elif isinstance(val, list): + new_val = str(val) + else: + new_val = val + ret[key] = new_val + return ret + + +def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): + old = _get_params(repo, *args, **kwargs, rev=(a_rev or "HEAD")) + new = _get_params(repo, *args, **kwargs, rev=b_rev) + + return dvc.utils.diff.diff(_format(old), _format(new)) diff --git a/dvc/repo/params/show.py b/dvc/repo/params/show.py new file mode 100644 index 0000000000..69d83ca06e --- /dev/null +++ b/dvc/repo/params/show.py @@ -0,0 +1,67 @@ +import copy + +from dvc.repo import locked +from dvc.exceptions import DvcException +from dvc.dependency.param import DependencyPARAMS + + +class NoParamsError(DvcException): + pass + + +def _collect_params(repo): + configs = {} + for stage in repo.stages: + for dep in stage.deps: + if not isinstance(dep, DependencyPARAMS): + continue + + if dep.path_info not in configs.keys(): + configs[dep.path_info] = copy.copy(dep) + continue + + existing = set(configs[dep.path_info].params) + new = set(dep.params) + configs[dep.path_info].params = list(existing.update(new)) + + return configs.values() + + +def _read_params(deps): + res = {} + for dep in deps: + assert dep.scheme == "local" + + params = dep.read_params() + if not params: + continue + + res[str(dep.path_info)] = params + + return res + + +@locked +def show(repo, revs=None): + res = {} + + for branch in repo.brancher(revs=revs): + entries = _collect_params(repo) + params = _read_params(entries) + + if params: + res[branch] = params + + if not res: + raise NoParamsError("no parameter configs files in this repository") + + # Hide working tree params if they are the same as in the active branch + try: + active_branch = repo.scm.active_branch() + except TypeError: + pass # Detached head + else: + if res.get("working tree") == res.get(active_branch): + res.pop("working tree", None) + + return res diff --git a/dvc/utils/diff.py b/dvc/utils/diff.py new file mode 100644 index 0000000000..35973491af --- /dev/null +++ b/dvc/utils/diff.py @@ -0,0 +1,105 @@ +import json +from collections import defaultdict + +from flatten_json import flatten + + +def _parse(raw): + if raw is None or isinstance(raw, (dict, list, int, float)): + return raw + + assert isinstance(raw, str) + try: + return json.loads(raw) + except json.JSONDecodeError: + return raw + + +def _diff_vals(old, new): + if ( + isinstance(new, list) + and isinstance(old, list) + and len(old) == len(new) == 1 + ): + return _diff_vals(old[0], new[0]) + + if old == new: + return {} + + res = {"old": old, "new": new} + if isinstance(new, (int, float)) and isinstance(old, (int, float)): + res["diff"] = new - old + return res + + +def _flatten(d): + if not d: + return defaultdict(lambda: None) + + if isinstance(d, dict): + return defaultdict(lambda: None, flatten(d, ".")) + + return defaultdict(lambda: "unable to parse") + + +def _diff_dicts(old_dict, new_dict): + new = _flatten(new_dict) + old = _flatten(old_dict) + + res = defaultdict(dict) + + xpaths = set(old.keys()) + xpaths.update(set(new.keys())) + for xpath in xpaths: + old_val = old[xpath] + new_val = new[xpath] + val_diff = _diff_vals(old_val, new_val) + if val_diff: + res[xpath] = val_diff + return dict(res) + + +def _diff(old_raw, new_raw): + old = _parse(old_raw) + new = _parse(new_raw) + + if isinstance(new, dict) or isinstance(old, dict): + return _diff_dicts(old, new) + + val_diff = _diff_vals(old, new) + if val_diff: + return {"": val_diff} + + return {} + + +def diff(old, new): + paths = set(old.keys()) + paths.update(set(new.keys())) + + res = defaultdict(dict) + for path in paths: + path_diff = _diff(old.get(path), new.get(path)) + if path_diff: + res[path] = path_diff + return dict(res) + + +def table(header, rows): + from texttable import Texttable + + if not rows: + return "" + + t = Texttable() + + # disable automatic formatting + t.set_cols_dtype(["t"] * len(header)) + + # remove borders to make it easier for users to copy stuff + t.set_chars([""] * len(header)) + t.set_deco(0) + + t.add_rows([header] + rows) + + return t.draw() diff --git a/tests/func/params/__init__.py b/tests/func/params/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/func/params/test_diff.py b/tests/func/params/test_diff.py new file mode 100644 index 0000000000..1645c46e8b --- /dev/null +++ b/tests/func/params/test_diff.py @@ -0,0 +1,87 @@ +def test_diff_no_params(tmp_dir, scm, dvc): + assert dvc.params.diff() == {} + + +def test_diff_no_changes(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("bar") + assert dvc.params.diff() == {} + + +def test_diff(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("bar") + + tmp_dir.scm_gen("params.yaml", "foo: baz", commit="baz") + tmp_dir.scm_gen("params.yaml", "foo: qux", commit="qux") + + assert dvc.params.diff(a_rev="HEAD~2") == { + "params.yaml": {"foo": {"old": "bar", "new": "qux"}} + } + + +def test_diff_new(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + + assert dvc.params.diff() == { + "params.yaml": {"foo": {"old": None, "new": "bar"}} + } + + +def test_diff_deleted(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("bar") + + (tmp_dir / "Dvcfile").unlink() + + assert dvc.params.diff() == { + "params.yaml": {"foo": {"old": "bar", "new": None}} + } + + +def test_diff_deleted_config(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("bar") + + (tmp_dir / "params.yaml").unlink() + + assert dvc.params.diff() == { + "params.yaml": {"foo": {"old": "bar", "new": None}} + } + + +def test_diff_list(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo:\n- bar\n- baz") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("foo") + + tmp_dir.gen("params.yaml", "foo:\n- bar\n- baz\n- qux") + + assert dvc.params.diff() == { + "params.yaml": { + "foo": {"old": "['bar', 'baz']", "new": "['bar', 'baz', 'qux']"} + } + } + + +def test_diff_dict(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo:\n bar: baz") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("foo") + + tmp_dir.gen("params.yaml", "foo:\n bar: qux") + + assert dvc.params.diff() == { + "params.yaml": {"foo.bar": {"old": "baz", "new": "qux"}} + } diff --git a/tests/func/params/test_show.py b/tests/func/params/test_show.py new file mode 100644 index 0000000000..51a2749720 --- /dev/null +++ b/tests/func/params/test_show.py @@ -0,0 +1,35 @@ +import pytest + +from dvc.repo.params.show import NoParamsError + + +def test_show_empty(dvc): + with pytest.raises(NoParamsError): + dvc.params.show() + + +def test_show(tmp_dir, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + assert dvc.params.show() == {"": {"params.yaml": {"foo": "bar"}}} + + +def test_show_list(tmp_dir, dvc): + tmp_dir.gen("params.yaml", "foo:\n- bar\n- baz\n") + dvc.run(params=["foo"]) + assert dvc.params.show() == {"": {"params.yaml": {"foo": ["bar", "baz"]}}} + + +def test_show_branch(tmp_dir, scm, dvc): + tmp_dir.gen("params.yaml", "foo: bar") + dvc.run(params=["foo"]) + scm.add(["params.yaml", "Dvcfile"]) + scm.commit("init") + + with tmp_dir.branch("branch", new=True): + tmp_dir.scm_gen("params.yaml", "foo: baz", commit="branch") + + assert dvc.params.show(revs=["branch"]) == { + "working tree": {"params.yaml": {"foo": "bar"}}, + "branch": {"params.yaml": {"foo": "baz"}}, + } diff --git a/tests/unit/command/test_params.py b/tests/unit/command/test_params.py new file mode 100644 index 0000000000..fe0420fc3b --- /dev/null +++ b/tests/unit/command/test_params.py @@ -0,0 +1,85 @@ +import logging + +from dvc.cli import parse_args +from dvc.command.params import CmdParamsDiff, _show_diff + + +def test_params_diff(dvc, mocker): + cli_args = parse_args( + ["params", "diff", "HEAD~10", "HEAD~1", "--show-json"] + ) + assert cli_args.func == CmdParamsDiff + + cmd = cli_args.func(cli_args) + m = mocker.patch("dvc.repo.params.diff.diff", return_value={}) + + assert cmd.run() == 0 + + m.assert_called_once_with( + cmd.repo, a_rev="HEAD~10", b_rev="HEAD~1", + ) + + +def test_params_diff_changed(): + assert _show_diff({"params.yaml": {"a.b.c": {"old": 1, "new": 2}}}) == ( + " Path Param Old New\n" "params.yaml a.b.c 1 2 " + ) + + +def test_params_diff_list(): + assert _show_diff( + {"params.yaml": {"a.b.c": {"old": 1, "new": [2, 3]}}} + ) == ( + " Path Param Old New \n" + "params.yaml a.b.c 1 [2, 3]" + ) + + +def test_params_diff_unchanged(): + assert _show_diff( + {"params.yaml": {"a.b.d": {"old": "old", "new": "new"}}} + ) == ( + " Path Param Old New\n" "params.yaml a.b.d old new" + ) + + +def test_params_diff_no_changes(): + assert _show_diff({}) == "" + + +def test_params_diff_new(): + assert _show_diff( + {"params.yaml": {"a.b.d": {"old": None, "new": "new"}}} + ) == ( + " Path Param Old New\n" "params.yaml a.b.d None new" + ) + + +def test_params_diff_deleted(): + assert _show_diff( + {"params.yaml": {"a.b.d": {"old": "old", "new": None}}} + ) == ( + " Path Param Old New \n" "params.yaml a.b.d old None" + ) + + +def test_params_diff_prec(): + assert _show_diff( + {"params.yaml": {"train.lr": {"old": 0.0042, "new": 0.0043}}} + ) == ( + " Path Param Old New \n" + "params.yaml train.lr 0.0042 0.0043" + ) + + +def test_params_diff_show_json(dvc, mocker, caplog): + cli_args = parse_args( + ["params", "diff", "HEAD~10", "HEAD~1", "--show-json"] + ) + cmd = cli_args.func(cli_args) + mocker.patch( + "dvc.repo.params.diff.diff", return_value={"params.yaml": {"a": "b"}} + ) + with caplog.at_level(logging.INFO, logger="dvc"): + assert cmd.run() == 0 + assert '{"params.yaml": {"a": "b"}}\n' in caplog.text diff --git a/tests/unit/dependency/test_params.py b/tests/unit/dependency/test_params.py index f9867a81a7..d6f3f199d6 100644 --- a/tests/unit/dependency/test_params.py +++ b/tests/unit/dependency/test_params.py @@ -57,24 +57,24 @@ def test_dumpd_without_info(dvc): } -def test_get_info_nonexistent_file(dvc): +def test_read_params_nonexistent_file(dvc): dep = DependencyPARAMS(Stage(dvc), None, ["foo"]) - assert dep._get_info() == {} + assert dep.read_params() == {} -def test_get_info_unsupported_format(tmp_dir, dvc): +def test_read_params_unsupported_format(tmp_dir, dvc): tmp_dir.gen("params.yaml", b"\0\1\2\3\4\5\6\7") dep = DependencyPARAMS(Stage(dvc), None, ["foo"]) with pytest.raises(BadParamFileError): - dep._get_info() + dep.read_params() -def test_get_info_nested(tmp_dir, dvc): +def test_read_params_nested(tmp_dir, dvc): tmp_dir.gen( "params.yaml", yaml.dump({"some": {"path": {"foo": ["val1", "val2"]}}}) ) dep = DependencyPARAMS(Stage(dvc), None, ["some.path.foo"]) - assert dep._get_info() == {"some.path.foo": ["val1", "val2"]} + assert dep.read_params() == {"some.path.foo": ["val1", "val2"]} def test_save_info_missing_config(dvc):