From f7af84f877dcdb562db0485e4dc61457edfe51dd Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Wed, 8 Jul 2020 16:39:47 +0900 Subject: [PATCH 01/15] experiments: add initial experiments.show * only includes metrics + params (no code/data information) --- dvc/repo/__init__.py | 3 +++ dvc/repo/experiments/__init__.py | 11 ++++++++ dvc/repo/experiments/show.py | 45 ++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) create mode 100644 dvc/repo/experiments/__init__.py create mode 100644 dvc/repo/experiments/show.py diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index a1c3bc7554..b6e925612e 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -74,6 +74,7 @@ def __init__(self, root_dir=None, scm=None, rev=None): from dvc.scm import SCM from dvc.cache import Cache from dvc.data_cloud import DataCloud + from dvc.repo.experiments import Experiments from dvc.repo.metrics import Metrics from dvc.repo.plots import Plots from dvc.repo.params import Params @@ -126,6 +127,8 @@ def __init__(self, root_dir=None, scm=None, rev=None): self.plots = Plots(self) self.params = Params(self) + self.experiments = Experiments(self) + self._ignore() @property diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py new file mode 100644 index 0000000000..9a0a07db87 --- /dev/null +++ b/dvc/repo/experiments/__init__.py @@ -0,0 +1,11 @@ +class Experiments: + def __init__(self, repo): + self.repo = repo + + def show(self, *args, **kwargs): + from dvc.repo.experiments.show import show + + return show(self.repo, *args, **kwargs) + + def list(self, *args, **kwargs): + pass diff --git a/dvc/repo/experiments/show.py b/dvc/repo/experiments/show.py new file mode 100644 index 0000000000..a63e5191d7 --- /dev/null +++ b/dvc/repo/experiments/show.py @@ -0,0 +1,45 @@ +import logging +from collections import defaultdict + +from dvc.exceptions import DvcException +from dvc.repo import locked + +logger = logging.getLogger(__name__) + + +@locked +def show( + repo, all_branches=False, all_tags=False, revs=None, all_commits=False +): + from dvc.repo.metrics.show import _collect_metrics, _read_metrics + from dvc.repo.params.show import _collect_configs, _read_params + + res = defaultdict(dict) + for rev in repo.brancher( + revs=revs, + all_branches=all_branches, + all_tags=all_tags, + all_commits=all_commits, + ): + configs = _collect_configs(repo) + params = _read_params(repo, configs, rev) + if params: + res[rev]["params"] = params + + metrics = _collect_metrics(repo, None, False) + vals = _read_metrics(repo, metrics, rev) + if vals: + res[rev]["metrics"] = vals + + if not res: + raise DvcException("no metrics or params in this repository") + + try: + active_branch = repo.scm.active_branch() + except TypeError: + pass # Detached head + else: + if res.get("workspace") == res.get(active_branch): + res.pop("workspace", None) + + return res From 0c69614ce9378c9adc2b2875cd7c8561c6f78338 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Wed, 8 Jul 2020 17:37:45 +0900 Subject: [PATCH 02/15] experiments: basic `dvc experiments show` functionality --- dvc/cli.py | 2 + dvc/command/experiments.py | 179 +++++++++++++++++++++++++++++++++++++ setup.py | 1 + 3 files changed, 182 insertions(+) create mode 100644 dvc/command/experiments.py diff --git a/dvc/cli.py b/dvc/cli.py index 44af80d6aa..1392332040 100644 --- a/dvc/cli.py +++ b/dvc/cli.py @@ -15,6 +15,7 @@ data_sync, destroy, diff, + experiments, freeze, gc, get, @@ -77,6 +78,7 @@ update, git_hook, plots, + experiments, ] diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py new file mode 100644 index 0000000000..4bbc24c2cb --- /dev/null +++ b/dvc/command/experiments.py @@ -0,0 +1,179 @@ +import argparse +import io +import logging + +from dvc.command.base import CmdBase, append_doc_link, fix_subparsers +from dvc.exceptions import DvcException + +logger = logging.getLogger(__name__) + + +def _update_names(names, items): + from flatten_json import flatten + + for name, item in items: + if isinstance(item, dict): + item = flatten(item, ".") + names.update(item.keys()) + else: + names.add(name) + + +def _collect_names(experiments): + metric_names = set() + param_names = set() + + for exp in experiments.values(): + _update_names(metric_names, exp.get("metrics", {}).items()) + _update_names(param_names, exp.get("params", {}).items()) + + return sorted(metric_names), sorted(param_names) + + +def _collect_rows( + experiments, metric_names, param_names, include_rev=False, precision=None +): + from flatten_json import flatten + from dvc.command.metrics import DEFAULT_PRECISION + + if precision is None: + precision = DEFAULT_PRECISION + + def _round(val): + if isinstance(val, float): + return round(val, precision) + + return val + + def _extend(row, names, items): + for fname, item in items: + if isinstance(item, dict): + item = flatten(item, ".") + else: + item = {fname: item} + for name in names: + if name in item: + row.append(str(_round(item[name]))) + else: + row.append("-") + + for rev, exp in experiments.items(): + row = [] + if include_rev: + row.append(rev) + else: + row.append(None) + + _extend(row, metric_names, exp.get("metrics", {}).items()) + _extend(row, param_names, exp.get("params", {}).items()) + + yield row + + +def _show_experiments( + experiments, + all_branches=False, + all_tags=False, + all_commits=False, + precision=None, +): + from rich.console import Console + from rich.table import Table + from dvc.utils.pager import pager + + metric_names, param_names = _collect_names(experiments) + include_rev = all_branches or all_tags or all_commits + + table = Table(show_lines=True) + table.add_column("Commit") + for name in metric_names: + table.add_column(name, justify="right") + for name in param_names: + table.add_column(name, justify="left") + + for row in _collect_rows( + experiments, + metric_names, + param_names, + include_rev=include_rev, + precision=precision, + ): + table.add_row(*row) + + # Note: rich does not currently include a native way to force infinite + # width for use with a pager + console = Console(file=io.StringIO(), force_terminal=True, width=9999) + console.print(table) + pager(console.file.getvalue()) + + +class CmdExperimentsShow(CmdBase): + def run(self): + try: + experiments = self.repo.experiments.show( + all_branches=self.args.all_branches, + all_tags=self.args.all_tags, + all_commits=self.args.all_commits, + ) + + _show_experiments( + experiments, + self.args.all_branches, + self.args.all_tags, + self.args.all_commits, + ) + except DvcException: + logger.exception("failed to show experiments") + return 1 + + return 0 + + +def add_parser(subparsers, parent_parser): + EXPERIMENTS_HELP = "Commands to display and compare experiments." + + experiments_parser = subparsers.add_parser( + "experiments", + parents=[parent_parser], + description=append_doc_link(EXPERIMENTS_HELP, "experiments"), + help=EXPERIMENTS_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + + experiments_subparsers = experiments_parser.add_subparsers( + dest="cmd", + help="Use `dvc experiments CMD --help` to display " + "command-specific help.", + ) + + fix_subparsers(experiments_subparsers) + + EXPERIMENTS_SHOW_HELP = "Print experiments." + experiments_show_parser = experiments_subparsers.add_parser( + "show", + parents=[parent_parser], + description=append_doc_link(EXPERIMENTS_SHOW_HELP, "experiments/show"), + help=EXPERIMENTS_SHOW_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + experiments_show_parser.add_argument( + "-a", + "--all-branches", + action="store_true", + default=False, + help="Show metrics for all branches.", + ) + experiments_show_parser.add_argument( + "-T", + "--all-tags", + action="store_true", + default=False, + help="Show metrics for all tags.", + ) + experiments_show_parser.add_argument( + "--all-commits", + action="store_true", + default=False, + help="Show metrics for all commits.", + ) + experiments_show_parser.set_defaults(func=CmdExperimentsShow) diff --git a/setup.py b/setup.py index f0646c7a2c..332b69df96 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ def run(self): "pygtrie==2.3.2", "dpath>=2.0.1,<3", "shtab>=1.1.0,<2", + "rich>=3.0.5", ] From c648e403d5005fc579760d01694d08fe8b0ad0a3 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 10 Jul 2020 18:23:38 +0900 Subject: [PATCH 03/15] experiments: clone and run repro inside clone workspace --- dvc/repo/experiments/__init__.py | 106 ++++++++++++++++++++++++++++++- 1 file changed, 103 insertions(+), 3 deletions(-) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 9a0a07db87..67d07da7fb 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -1,11 +1,111 @@ +import logging +import os +import tempfile +from contextlib import contextmanager + +from funcy import cached_property + +from dvc.exceptions import DvcException +from dvc.scm.git import Git +from dvc.utils import relpath +from dvc.utils.fs import remove + +logger = logging.getLogger(__name__) + + +class UnchangedExperimentError(DvcException): + pass + + class Experiments: + """Class that manages experiments in a DVC repo. + + Args: + repo (dvc.repo.Repo): repo instance that these experiments belong to. + """ + + EXPERIMENTS_DIR = "experiments" + def __init__(self, repo): self.repo = repo - def show(self, *args, **kwargs): - from dvc.repo.experiments.show import show + @cached_property + def exp_dir(self): + return os.path.join(self.repo.dvc_dir, self.EXPERIMENTS_DIR) - return show(self.repo, *args, **kwargs) + @cached_property + def scm(self): + """Experiments clone scm instance.""" + if os.path.exists(self.exp_dir): + return Git(self.exp_dir) + return self._init_clone() + + @cached_property + def exp_dvc_dir(self): + dvc_dir = relpath(self.repo.dvc_dir, self.repo.scm.root_dir) + return os.path.join(self.exp_dir, dvc_dir) + + @cached_property + def exp_dvc(self): + """Return clone dvc Repo instance.""" + from dvc.repo import Repo + + return Repo(self.exp_dvc_dir) + + @contextmanager + def _chdir(self): + cwd = os.getcwd() + os.chdir(self.exp_dvc.root_dir) + yield + os.chdir(cwd) + + def _init_clone(self): + src_dir = self.repo.scm.root_dir + logger.debug("Initializing experiments clone") + git = Git.clone(src_dir, self.exp_dir) + self._config_clone() + return git + + def _config_clone(self): + dvc_dir = relpath(self.repo.dvc_dir, self.repo.scm.root_dir) + local_config = os.path.join(self.exp_dir, dvc_dir, "config.local") + cache_dir = self.repo.cache.local.cache_dir + logger.debug("Writing experiments local config '%s'", local_config) + with open(local_config, "w") as fobj: + fobj.write(f"[cache]\n dir = {cache_dir}") + + def _scm_checkout(self, rev): + self.scm.repo.git.reset(hard=True) + if not Git.is_sha(rev) or not self.scm.has_rev(rev): + self.scm.pull() + logger.debug("Checking out base experiment commit '%s'", rev) + self.scm.checkout(rev) + + def _patch_exp(self): + """Create a patch based on the current (parent) workspace and apply it + to the experiment workspace. + """ + logger.debug("Patching experiment workspace") + tmp = tempfile.NamedTemporaryFile(delete=False).name + self.repo.scm.repo.git.diff(patch=True, output=tmp) + self.scm.repo.git.apply(tmp) + remove(tmp) + + def reproduce(self, *args, **kwargs): + rev = self.repo.scm.get_rev() + self._scm_checkout(rev) + self._patch_exp() + with self._chdir(): + self.exp_dvc.checkout() + return self.exp_dvc.reproduce(*args, **kwargs) + + def diff(self, *args, **kwargs): + pass def list(self, *args, **kwargs): pass + + def show(self, *args, **kwargs): + from dvc.repo.experiments.show import show + + return show(self.repo, *args, **kwargs) From 0291554e975e4c663c38599c17fc7db35f422e67 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Fri, 10 Jul 2020 18:26:20 +0900 Subject: [PATCH 04/15] repro: add -e/--experiment option --- .dvc/.gitignore | 1 + dvc/command/repro.py | 8 ++++++++ dvc/repo/__init__.py | 6 +++++- dvc/repo/reproduce.py | 9 +++++++++ 4 files changed, 23 insertions(+), 1 deletion(-) diff --git a/.dvc/.gitignore b/.dvc/.gitignore index 5f594f74ac..53a678bba6 100644 --- a/.dvc/.gitignore +++ b/.dvc/.gitignore @@ -9,3 +9,4 @@ /pkg /repos /tmp +/experiments diff --git a/dvc/command/repro.py b/dvc/command/repro.py index 986b9f0b48..31ff6b3938 100644 --- a/dvc/command/repro.py +++ b/dvc/command/repro.py @@ -40,6 +40,7 @@ def run(self): downstream=self.args.downstream, recursive=self.args.recursive, force_downstream=self.args.force_downstream, + experiment=self.args.experiment, ) if len(stages) == 0: @@ -166,4 +167,11 @@ def add_parser(subparsers, parent_parser): default=False, help="Start from the specified stages when reproducing pipelines.", ) + repro_parser.add_argument( + "-e", + "--experiment", + action="store_true", + default=False, + help="Save reproduction results as an experiment.", + ) repro_parser.set_defaults(func=CmdRepro) diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index b6e925612e..aed49c17a3 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -193,7 +193,11 @@ def unprotect(self, target): return self.cache.local.tree.unprotect(PathInfo(target)) def _ignore(self): - flist = [self.config.files["local"], self.tmp_dir] + flist = [ + self.config.files["local"], + self.tmp_dir, + self.experiments.exp_dir, + ] if path_isin(self.cache.local.cache_dir, self.root_dir): flist += [self.cache.local.cache_dir] diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 1c33e23b40..6be2cbe0c7 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -69,6 +69,15 @@ def reproduce( "Neither `target` nor `--all-pipelines` are specified." ) + experiment = kwargs.pop("experiment", False) + if experiment: + return self.experiments.reproduce( + target=target, + recursive=recursive, + all_pipelines=all_pipelines, + **kwargs + ) + interactive = kwargs.get("interactive", False) if not interactive: kwargs["interactive"] = self.config["core"].get("interactive", False) From 8d14dd633d5eb2a9a7cda40be9c1e3a125209ada Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 13 Jul 2020 13:50:47 +0900 Subject: [PATCH 05/15] experiments: hash experiments to identify duplicates --- dvc/repo/experiments/__init__.py | 52 ++++++++++++++++++++++++++++---- dvc/repo/reproduce.py | 2 +- 2 files changed, 47 insertions(+), 7 deletions(-) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 67d07da7fb..9f96a2e304 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -7,7 +7,8 @@ from dvc.exceptions import DvcException from dvc.scm.git import Git -from dvc.utils import relpath +from dvc.stage.serialize import to_lockfile +from dvc.utils import dict_sha256, relpath from dvc.utils.fs import remove logger = logging.getLogger(__name__) @@ -52,6 +53,13 @@ def exp_dvc(self): return Repo(self.exp_dvc_dir) + @staticmethod + def exp_hash(stages): + exp_data = {} + for stage in stages: + exp_data.update(to_lockfile(stage)) + return dict_sha256(exp_data) + @contextmanager def _chdir(self): cwd = os.getcwd() @@ -91,14 +99,46 @@ def _patch_exp(self): self.scm.repo.git.apply(tmp) remove(tmp) - def reproduce(self, *args, **kwargs): - rev = self.repo.scm.get_rev() - self._scm_checkout(rev) - self._patch_exp() + def _commit(self, stages, check_exists=True, branch=True, rev=None): + """Commit stages as an experiment and return the commit SHA.""" + hash_ = self.exp_hash(stages) + exp_name = f"{rev[:7]}-{hash_}" + if branch: + if check_exists and exp_name in self.scm.list_branches(): + logger.debug("Using existing experiment branch '%s'", exp_name) + return self.scm.resolve_rev(exp_name) + self.scm.checkout(exp_name, create_new=True) + logger.debug("Commit new experiment branch '%s'", exp_name) + self.scm.repo.git.add(A=True) + self.scm.commit(f"Add experiment {exp_name}") + + def _reproduce(self, *args, **kwargs): + """Run `dvc repro` inside the experiments workspace.""" with self._chdir(): - self.exp_dvc.checkout() return self.exp_dvc.reproduce(*args, **kwargs) + def new(self, *args, workspace=True, **kwargs): + """Create a new experiment. + + Experiment will be reproduced and checked out into the user's + workspace. + """ + rev = self.repo.scm.get_rev() + self._scm_checkout(rev) + if workspace: + self._patch_exp() + else: + # configure params via command line here + pass + self.exp_dvc.checkout() + stages = self._reproduce(*args, **kwargs) + self._commit(stages, rev=rev) + self.checkout() + return stages + + def checkout(self): + pass + def diff(self, *args, **kwargs): pass diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 6be2cbe0c7..0df3dc2ab4 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -71,7 +71,7 @@ def reproduce( experiment = kwargs.pop("experiment", False) if experiment: - return self.experiments.reproduce( + return self.experiments.new( target=target, recursive=recursive, all_pipelines=all_pipelines, From f20cc82f48dbad103937d2232b7101cf06cdfab4 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 13 Jul 2020 15:37:27 +0900 Subject: [PATCH 06/15] experiments: update show --- dvc/command/experiments.py | 97 +++++++++++++++++--------------- dvc/repo/experiments/__init__.py | 3 - dvc/repo/experiments/show.py | 71 ++++++++++++++--------- 3 files changed, 96 insertions(+), 75 deletions(-) diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index 4bbc24c2cb..60232bbd33 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -19,19 +19,20 @@ def _update_names(names, items): names.add(name) -def _collect_names(experiments): +def _collect_names(all_experiments): metric_names = set() param_names = set() - for exp in experiments.values(): - _update_names(metric_names, exp.get("metrics", {}).items()) - _update_names(param_names, exp.get("params", {}).items()) + for _, experiments in all_experiments.items(): + for exp in experiments.values(): + _update_names(metric_names, exp.get("metrics", {}).items()) + _update_names(param_names, exp.get("params", {}).items()) return sorted(metric_names), sorted(param_names) def _collect_rows( - experiments, metric_names, param_names, include_rev=False, precision=None + base_rev, experiments, metric_names, param_names, precision=None ): from flatten_json import flatten from dvc.command.metrics import DEFAULT_PRECISION @@ -57,71 +58,77 @@ def _extend(row, names, items): else: row.append("-") - for rev, exp in experiments.items(): + for i, (rev, exp) in enumerate(experiments.items()): row = [] - if include_rev: - row.append(rev) + style = None + if rev == "baseline": + row.append(f"{base_rev}") + style = "bold" + elif i < len(experiments) - 1: + row.append(f"├── {rev[:7]}") else: - row.append(None) + row.append(f"└── {rev[:7]}") _extend(row, metric_names, exp.get("metrics", {}).items()) _extend(row, param_names, exp.get("params", {}).items()) - yield row + yield row, style -def _show_experiments( - experiments, - all_branches=False, - all_tags=False, - all_commits=False, - precision=None, -): - from rich.console import Console +def _show_experiments(all_experiments, console, precision=None): from rich.table import Table - from dvc.utils.pager import pager + from dvc.scm.git import Git - metric_names, param_names = _collect_names(experiments) - include_rev = all_branches or all_tags or all_commits + metric_names, param_names = _collect_names(all_experiments) - table = Table(show_lines=True) - table.add_column("Commit") + table = Table(row_styles=["white", "bright_white"]) + table.add_column("Experiment", header_style="black on grey93") for name in metric_names: - table.add_column(name, justify="right") + table.add_column( + name, justify="right", header_style="black on cornsilk1" + ) for name in param_names: - table.add_column(name, justify="left") - - for row in _collect_rows( - experiments, - metric_names, - param_names, - include_rev=include_rev, - precision=precision, - ): - table.add_row(*row) - - # Note: rich does not currently include a native way to force infinite - # width for use with a pager - console = Console(file=io.StringIO(), force_terminal=True, width=9999) + table.add_column( + name, justify="left", header_style="black on light_cyan1" + ) + + for base_rev, experiments in all_experiments.items(): + if Git.is_sha(base_rev): + base_rev = base_rev[:7] + + for row, style, in _collect_rows( + base_rev, + experiments, + metric_names, + param_names, + precision=precision, + ): + table.add_row(*row, style=style) + console.print(table) - pager(console.file.getvalue()) class CmdExperimentsShow(CmdBase): def run(self): + from rich.console import Console + from dvc.utils.pager import pager + try: - experiments = self.repo.experiments.show( + all_experiments = self.repo.experiments.show( all_branches=self.args.all_branches, all_tags=self.args.all_tags, all_commits=self.args.all_commits, ) - _show_experiments( - experiments, - self.args.all_branches, - self.args.all_tags, - self.args.all_commits, + # Note: rich does not currently include a native way to force + # infinite width for use with a pager + console = Console( + file=io.StringIO(), force_terminal=True, width=9999 ) + + _show_experiments(all_experiments, console) + + pager(console.file.getvalue()) except DvcException: logger.exception("failed to show experiments") return 1 diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 9f96a2e304..1c9f489eab 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -142,9 +142,6 @@ def checkout(self): def diff(self, *args, **kwargs): pass - def list(self, *args, **kwargs): - pass - def show(self, *args, **kwargs): from dvc.repo.experiments.show import show diff --git a/dvc/repo/experiments/show.py b/dvc/repo/experiments/show.py index a63e5191d7..e896af50e1 100644 --- a/dvc/repo/experiments/show.py +++ b/dvc/repo/experiments/show.py @@ -1,45 +1,62 @@ import logging -from collections import defaultdict +import re +from collections import OrderedDict, defaultdict -from dvc.exceptions import DvcException from dvc.repo import locked +from dvc.repo.metrics.show import _collect_metrics, _read_metrics +from dvc.repo.params.show import _collect_configs, _read_params logger = logging.getLogger(__name__) -@locked -def show( - repo, all_branches=False, all_tags=False, revs=None, all_commits=False -): - from dvc.repo.metrics.show import _collect_metrics, _read_metrics - from dvc.repo.params.show import _collect_configs, _read_params +EXP_RE = re.compile(r"(?P[a-f0-9]{7})-(?P[a-f0-9]+)") + +def _collect_experiment(repo, branch): res = defaultdict(dict) - for rev in repo.brancher( - revs=revs, - all_branches=all_branches, - all_tags=all_tags, - all_commits=all_commits, - ): + for rev in repo.brancher(revs=[branch]): configs = _collect_configs(repo) params = _read_params(repo, configs, rev) if params: - res[rev]["params"] = params + res["params"] = params metrics = _collect_metrics(repo, None, False) vals = _read_metrics(repo, metrics, rev) if vals: - res[rev]["metrics"] = vals - - if not res: - raise DvcException("no metrics or params in this repository") - - try: - active_branch = repo.scm.active_branch() - except TypeError: - pass # Detached head - else: - if res.get("workspace") == res.get(active_branch): - res.pop("workspace", None) + res["metrics"] = vals + + return res + + +@locked +def show( + repo, all_branches=False, all_tags=False, revs=None, all_commits=False +): + res = defaultdict(OrderedDict) + + if revs is None: + revs = [repo.scm.get_rev()] + + revs = set( + repo.brancher( + revs=revs, + all_branches=all_branches, + all_tags=all_tags, + all_commits=all_commits, + ) + ) + + for rev in revs: + res[rev]["baseline"] = _collect_experiment(repo, rev) + + for exp_branch in repo.experiments.scm.list_branches(): + m = re.match(EXP_RE, exp_branch) + if m: + rev = repo.scm.resolve_rev(m.group("rev_sha")) + if rev in revs: + experiment = _collect_experiment( + repo.experiments.exp_dvc, exp_branch + ) + res[rev][m.group("exp_sha")] = experiment return res From 413fe19acc4354f00a2e938dffa33d74c16700c5 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 13 Jul 2020 17:18:43 +0900 Subject: [PATCH 07/15] experiments: checkout experiment after running repro --- dvc/repo/experiments/__init__.py | 50 +++++++++++++++++++++++++++----- dvc/repo/reproduce.py | 17 +++++++---- 2 files changed, 53 insertions(+), 14 deletions(-) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 1c9f489eab..ddc94453f7 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -6,6 +6,8 @@ from funcy import cached_property from dvc.exceptions import DvcException +from dvc.repo import locked +from dvc.repo.scm_context import scm_context from dvc.scm.git import Git from dvc.stage.serialize import to_lockfile from dvc.utils import dict_sha256, relpath @@ -93,11 +95,18 @@ def _patch_exp(self): """Create a patch based on the current (parent) workspace and apply it to the experiment workspace. """ - logger.debug("Patching experiment workspace") tmp = tempfile.NamedTemporaryFile(delete=False).name - self.repo.scm.repo.git.diff(patch=True, output=tmp) - self.scm.repo.git.apply(tmp) - remove(tmp) + try: + self.repo.scm.repo.git.diff(patch=True, output=tmp) + if os.path.getsize(tmp): + logger.debug("Patching experiment workspace") + self.scm.repo.git.apply(tmp) + else: + raise UnchangedExperimentError( + "Experiment identical to baseline commit." + ) + finally: + remove(tmp) def _commit(self, stages, check_exists=True, branch=True, rev=None): """Commit stages as an experiment and return the commit SHA.""" @@ -111,6 +120,7 @@ def _commit(self, stages, check_exists=True, branch=True, rev=None): logger.debug("Commit new experiment branch '%s'", exp_name) self.scm.repo.git.add(A=True) self.scm.commit(f"Add experiment {exp_name}") + return self.scm.get_rev() def _reproduce(self, *args, **kwargs): """Run `dvc repro` inside the experiments workspace.""" @@ -132,12 +142,36 @@ def new(self, *args, workspace=True, **kwargs): pass self.exp_dvc.checkout() stages = self._reproduce(*args, **kwargs) - self._commit(stages, rev=rev) - self.checkout() + exp_rev = self._commit(stages, rev=rev) + self._checkout(exp_rev, force=True) return stages - def checkout(self): - pass + def _checkout(self, rev, force=False): + """Checkout an experiment to the user's workspace.""" + from git.exc import RepositoryDirtyError + + if force: + self.repo.scm.repo.git.reset(hard=True) + self._scm_checkout(rev) + + logger.debug("Patching local workspace") + tmp = tempfile.NamedTemporaryFile(delete=False).name + try: + self.scm.repo.head.commit.diff("HEAD~1", patch=True, output=tmp) + if os.path.getsize(tmp): + self.repo.scm.repo.git.apply(tmp, reverse=True, reject=True) + except RepositoryDirtyError: + raise DvcException( + "Could not checkout experiment, workspace contains " + "uncommitted changes." + ) + finally: + remove(tmp) + + @locked + @scm_context + def checkout(self, *args, **kwargs): + return self._checkout(*args, **kwargs) def diff(self, *args, **kwargs): pass diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index 0df3dc2ab4..c0da3d22b1 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -1,6 +1,7 @@ import logging from dvc.exceptions import InvalidArgumentError, ReproductionError +from dvc.repo.experiments import UnchangedExperimentError from dvc.repo.scm_context import scm_context from . import locked @@ -71,12 +72,16 @@ def reproduce( experiment = kwargs.pop("experiment", False) if experiment: - return self.experiments.new( - target=target, - recursive=recursive, - all_pipelines=all_pipelines, - **kwargs - ) + try: + return self.experiments.new( + target=target, + recursive=recursive, + all_pipelines=all_pipelines, + **kwargs + ) + except UnchangedExperimentError as exc: + # If experiment contains no changes, just run regular repro + logger.debug(exc) interactive = kwargs.get("interactive", False) if not interactive: From d28c28ac25e5aef6d04ae2843492d129e7b1d297 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Mon, 13 Jul 2020 18:09:37 +0900 Subject: [PATCH 08/15] experiments: add `dvc experiments checkout` --- dvc/command/experiments.py | 32 ++++++++++++++++++++++++++++++ dvc/repo/experiments/__init__.py | 34 +++++++++++++++++--------------- dvc/repo/experiments/checkout.py | 17 ++++++++++++++++ dvc/repo/experiments/show.py | 3 ++- 4 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 dvc/repo/experiments/checkout.py diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index 60232bbd33..63fd443274 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -136,6 +136,15 @@ def run(self): return 0 +class CmdExperimentsCheckout(CmdBase): + def run(self): + self.repo.experiments.checkout( + self.args.experiment, force=self.args.force + ) + + return 0 + + def add_parser(subparsers, parent_parser): EXPERIMENTS_HELP = "Commands to display and compare experiments." @@ -184,3 +193,26 @@ def add_parser(subparsers, parent_parser): help="Show metrics for all commits.", ) experiments_show_parser.set_defaults(func=CmdExperimentsShow) + + EXPERIMENTS_CHECKOUT_HELP = "Checkout experiments." + experiments_checkout_parser = experiments_subparsers.add_parser( + "checkout", + parents=[parent_parser], + description=append_doc_link( + EXPERIMENTS_CHECKOUT_HELP, "experiments/checkout" + ), + help=EXPERIMENTS_CHECKOUT_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + experiments_checkout_parser.add_argument( + "-f", + "--force", + action="store_true", + default=False, + help="Overwrite your current workspace with changes from the " + "experiment.", + ) + experiments_checkout_parser.add_argument( + "experiment", help="Checkout this experiment.", + ) + experiments_checkout_parser.set_defaults(func=CmdExperimentsCheckout) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index ddc94453f7..9892c7577e 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -6,8 +6,6 @@ from funcy import cached_property from dvc.exceptions import DvcException -from dvc.repo import locked -from dvc.repo.scm_context import scm_context from dvc.scm.git import Git from dvc.stage.serialize import to_lockfile from dvc.utils import dict_sha256, relpath @@ -86,8 +84,8 @@ def _config_clone(self): def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) - if not Git.is_sha(rev) or not self.scm.has_rev(rev): - self.scm.pull() + # if not Git.is_sha(rev) or not self.scm.has_rev(rev): + # self.scm.pull() logger.debug("Checking out base experiment commit '%s'", rev) self.scm.checkout(rev) @@ -143,35 +141,39 @@ def new(self, *args, workspace=True, **kwargs): self.exp_dvc.checkout() stages = self._reproduce(*args, **kwargs) exp_rev = self._commit(stages, rev=rev) - self._checkout(exp_rev, force=True) + self.checkout_exp(exp_rev, force=True) return stages - def _checkout(self, rev, force=False): + def checkout_exp(self, rev, force=False): """Checkout an experiment to the user's workspace.""" - from git.exc import RepositoryDirtyError + from git.exc import GitCommandError + from dvc.repo.checkout import _checkout as dvc_checkout if force: self.repo.scm.repo.git.reset(hard=True) + logger.debug(f"checkout {rev}") self._scm_checkout(rev) - logger.debug("Patching local workspace") tmp = tempfile.NamedTemporaryFile(delete=False).name + self.scm.repo.head.commit.diff("HEAD~1", patch=True, output=tmp) try: - self.scm.repo.head.commit.diff("HEAD~1", patch=True, output=tmp) if os.path.getsize(tmp): - self.repo.scm.repo.git.apply(tmp, reverse=True, reject=True) - except RepositoryDirtyError: + logger.debug("Patching local workspace") + self.repo.scm.repo.git.apply(tmp, reverse=True) + dvc_checkout(self.repo) + except GitCommandError: raise DvcException( - "Could not checkout experiment, workspace contains " - "uncommitted changes." + "Checkout failed, experiment contains changes which " + "conflict with your current workspace. To overwrite " + "your workspace, use `dvc experiments checkout --force`." ) finally: remove(tmp) - @locked - @scm_context def checkout(self, *args, **kwargs): - return self._checkout(*args, **kwargs) + from dvc.repo.experiments.checkout import checkout + + return checkout(self.repo, *args, **kwargs) def diff(self, *args, **kwargs): pass diff --git a/dvc/repo/experiments/checkout.py b/dvc/repo/experiments/checkout.py new file mode 100644 index 0000000000..a1e2e1f1a4 --- /dev/null +++ b/dvc/repo/experiments/checkout.py @@ -0,0 +1,17 @@ +import logging + +from dvc.repo import locked +from dvc.repo.scm_context import scm_context + +logger = logging.getLogger(__name__) + + +@locked +@scm_context +def checkout(repo, rev, *args, **kwargs): + repo.experiments.checkout_exp(rev, *args, **kwargs) + logger.info( + "Changes for experiment '%s' have been applied to your current " + "workspace.", + rev, + ) diff --git a/dvc/repo/experiments/show.py b/dvc/repo/experiments/show.py index e896af50e1..5da293b914 100644 --- a/dvc/repo/experiments/show.py +++ b/dvc/repo/experiments/show.py @@ -54,9 +54,10 @@ def show( if m: rev = repo.scm.resolve_rev(m.group("rev_sha")) if rev in revs: + exp_rev = repo.experiments.scm.resolve_rev(exp_branch) experiment = _collect_experiment( repo.experiments.exp_dvc, exp_branch ) - res[rev][m.group("exp_sha")] = experiment + res[rev][exp_rev] = experiment return res From baa57d9dcadfd05e41138347ebc7f962666bc70c Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 15:44:42 +0900 Subject: [PATCH 09/15] experiments: add simple `dvc experiments diff` command * just shows combined output from `metrics diff` and `plots diff` for now --- dvc/command/experiments.py | 134 ++++++++++++++++++++++++++++++- dvc/repo/experiments/__init__.py | 8 +- dvc/repo/experiments/diff.py | 36 +++++++++ dvc/repo/experiments/show.py | 7 +- 4 files changed, 178 insertions(+), 7 deletions(-) create mode 100644 dvc/repo/experiments/diff.py diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index 63fd443274..0cfef7e1bc 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -1,8 +1,10 @@ import argparse import io import logging +from collections import OrderedDict from dvc.command.base import CmdBase, append_doc_link, fix_subparsers +from dvc.command.metrics import DEFAULT_PRECISION from dvc.exceptions import DvcException logger = logging.getLogger(__name__) @@ -35,7 +37,6 @@ def _collect_rows( base_rev, experiments, metric_names, param_names, precision=None ): from flatten_json import flatten - from dvc.command.metrics import DEFAULT_PRECISION if precision is None: precision = DEFAULT_PRECISION @@ -145,6 +146,78 @@ def run(self): return 0 +def _show_diff( + diff, title="", markdown=False, no_path=False, old=False, precision=None +): + from dvc.utils.diff import table + + if precision is None: + precision = DEFAULT_PRECISION + + def _round(val): + if isinstance(val, float): + return round(val, precision) + + return val + + rows = [] + for fname, diff_ in diff.items(): + sorted_diff = OrderedDict(sorted(diff_.items())) + for item, change in sorted_diff.items(): + row = [] if no_path else [fname] + row.append(item) + if old: + row.append(_round(change.get("old"))) + row.append(_round(change["new"])) + row.append(_round(change.get("diff", "diff not supported"))) + rows.append(row) + + header = [] if no_path else ["Path"] + header.append(title) + if old: + header.extend(["Old", "New"]) + else: + header.append("Value") + header.append("Change") + + return table(header, rows, markdown) + + +class CmdExperimentsDiff(CmdBase): + def run(self): + try: + diff = self.repo.experiments.diff( + a_rev=self.args.a_rev, + b_rev=self.args.b_rev, + all=self.args.all, + ) + + if self.args.show_json: + import json + + logger.info(json.dumps(diff)) + else: + diffs = [("metrics", "Metric"), ("params", "Param")] + for key, title in diffs: + table = _show_diff( + diff[key], + title=title, + markdown=self.args.show_md, + no_path=self.args.no_path, + old=self.args.old, + precision=self.args.precision, + ) + if table: + logger.info(table) + logger.info("") + + except DvcException: + logger.exception("failed to show experiments diff") + return 1 + + return 0 + + def add_parser(subparsers, parent_parser): EXPERIMENTS_HELP = "Commands to display and compare experiments." @@ -216,3 +289,62 @@ def add_parser(subparsers, parent_parser): "experiment", help="Checkout this experiment.", ) experiments_checkout_parser.set_defaults(func=CmdExperimentsCheckout) + + EXPERIMENTS_DIFF_HELP = ( + "Show changes between experiments in the DVC repository." + ) + experiments_diff_parser = experiments_subparsers.add_parser( + "diff", + parents=[parent_parser], + description=append_doc_link(EXPERIMENTS_DIFF_HELP, "experiments/diff"), + help=EXPERIMENTS_DIFF_HELP, + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + experiments_diff_parser.add_argument( + "a_rev", nargs="?", help="Old experiment to compare (defaults to HEAD)" + ) + experiments_diff_parser.add_argument( + "b_rev", + nargs="?", + help="New experiment to compare (defaults to the current workspace)", + ) + experiments_diff_parser.add_argument( + "--all", + action="store_true", + default=False, + help="Show unchanged metrics/params as well.", + ) + experiments_diff_parser.add_argument( + "--show-json", + action="store_true", + default=False, + help="Show output in JSON format.", + ) + experiments_diff_parser.add_argument( + "--show-md", + action="store_true", + default=False, + help="Show tabulated output in the Markdown format (GFM).", + ) + experiments_diff_parser.add_argument( + "--old", + action="store_true", + default=False, + help="Show old metric/param value.", + ) + experiments_diff_parser.add_argument( + "--no-path", + action="store_true", + default=False, + help="Don't show metric/param path.", + ) + experiments_diff_parser.add_argument( + "--precision", + type=int, + help=( + "Round metrics/params to `n` digits precision after the decimal " + f"point. Rounds to {DEFAULT_PRECISION} digits by default." + ), + metavar="", + ) + experiments_diff_parser.set_defaults(func=CmdExperimentsDiff) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 9892c7577e..b5d274ada9 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -61,7 +61,7 @@ def exp_hash(stages): return dict_sha256(exp_data) @contextmanager - def _chdir(self): + def chdir(self): cwd = os.getcwd() os.chdir(self.exp_dvc.root_dir) yield @@ -122,7 +122,7 @@ def _commit(self, stages, check_exists=True, branch=True, rev=None): def _reproduce(self, *args, **kwargs): """Run `dvc repro` inside the experiments workspace.""" - with self._chdir(): + with self.chdir(): return self.exp_dvc.reproduce(*args, **kwargs) def new(self, *args, workspace=True, **kwargs): @@ -176,7 +176,9 @@ def checkout(self, *args, **kwargs): return checkout(self.repo, *args, **kwargs) def diff(self, *args, **kwargs): - pass + from dvc.repo.experiments.diff import diff + + return diff(self.repo, *args, **kwargs) def show(self, *args, **kwargs): from dvc.repo.experiments.show import show diff --git a/dvc/repo/experiments/diff.py b/dvc/repo/experiments/diff.py new file mode 100644 index 0000000000..8dd5538977 --- /dev/null +++ b/dvc/repo/experiments/diff.py @@ -0,0 +1,36 @@ +import logging + +from dvc.utils.diff import diff as _diff +from dvc.utils.diff import format_dict + +logger = logging.getLogger(__name__) + + +def diff(repo, *args, a_rev=None, b_rev=None, **kwargs): + from dvc.repo.experiments.show import _collect_experiment + + if repo.scm.no_commits: + return {} + + if a_rev: + with repo.experiments.chdir(): + old = _collect_experiment(repo.experiments.exp_dvc, a_rev) + else: + old = _collect_experiment(repo, "HEAD") + + if b_rev: + with repo.experiments.chdir(): + new = _collect_experiment(repo.experiments.exp_dvc, b_rev) + else: + new = _collect_experiment(repo, "workspace") + + with_unchanged = kwargs.pop("all", False) + + return { + key: _diff( + format_dict(old[key]), + format_dict(new[key]), + with_unchanged=with_unchanged, + ) + for key in ["metrics", "params"] + } diff --git a/dvc/repo/experiments/show.py b/dvc/repo/experiments/show.py index 5da293b914..bbb2c706ff 100644 --- a/dvc/repo/experiments/show.py +++ b/dvc/repo/experiments/show.py @@ -55,9 +55,10 @@ def show( rev = repo.scm.resolve_rev(m.group("rev_sha")) if rev in revs: exp_rev = repo.experiments.scm.resolve_rev(exp_branch) - experiment = _collect_experiment( - repo.experiments.exp_dvc, exp_branch - ) + with repo.experiments.chdir(): + experiment = _collect_experiment( + repo.experiments.exp_dvc, exp_branch + ) res[rev][exp_rev] = experiment return res From ea73659d084219b0891aaac6da5536f2e15aeb61 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 17:33:43 +0900 Subject: [PATCH 10/15] add very simple tests --- tests/func/experiments/__init__.py | 0 tests/func/experiments/test_experiments.py | 24 ++++++++++++++++++++++ tests/func/experiments/test_show.py | 19 +++++++++++++++++ 3 files changed, 43 insertions(+) create mode 100644 tests/func/experiments/__init__.py create mode 100644 tests/func/experiments/test_experiments.py create mode 100644 tests/func/experiments/test_show.py diff --git a/tests/func/experiments/__init__.py b/tests/func/experiments/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/func/experiments/test_experiments.py b/tests/func/experiments/test_experiments.py new file mode 100644 index 0000000000..33d41c6d21 --- /dev/null +++ b/tests/func/experiments/test_experiments.py @@ -0,0 +1,24 @@ +from tests.func.test_repro_multistage import COPY_SCRIPT + + +def test_new_simple(tmp_dir, scm, dvc, mocker): + tmp_dir.gen("copy.py", COPY_SCRIPT) + tmp_dir.gen("params.yaml", "foo: 1") + stage = dvc.run( + cmd="python copy.py params.yaml metrics.yaml", + metrics_no_cache=["metrics.yaml"], + params=["foo"], + name="copy-file", + ) + scm.add(["dvc.yaml", "dvc.lock", "copy.py", "params.yaml", "metrics.yaml"]) + scm.commit("init") + + tmp_dir.gen("params.yaml", "foo: 2") + + new_mock = mocker.spy(dvc.experiments, "new") + dvc.reproduce(stage.addressing, experiment=True) + + new_mock.assert_called_once() + assert ( + tmp_dir / ".dvc" / "experiments" / "metrics.yaml" + ).read_text() == "foo: 2" diff --git a/tests/func/experiments/test_show.py b/tests/func/experiments/test_show.py new file mode 100644 index 0000000000..bfcb595e49 --- /dev/null +++ b/tests/func/experiments/test_show.py @@ -0,0 +1,19 @@ +from tests.func.test_repro_multistage import COPY_SCRIPT + + +def test_show_simple(tmp_dir, scm, dvc): + tmp_dir.gen("copy.py", COPY_SCRIPT) + tmp_dir.gen("params.yaml", "foo: 1") + dvc.run( + cmd="python copy.py params.yaml metrics.yaml", + metrics_no_cache=["metrics.yaml"], + params=["foo"], + single_stage=True, + ) + + assert dvc.experiments.show()["workspace"] == { + "baseline": { + "metrics": {"metrics.yaml": {"foo": 1}}, + "params": {"params.yaml": {"foo": 1}}, + } + } From 6d6da97315cc5923b9ab3daef34c958479f17995 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 17:43:55 +0900 Subject: [PATCH 11/15] add some useful output messages --- dvc/repo/experiments/__init__.py | 7 ++++++- dvc/repo/reproduce.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index b5d274ada9..adf4118468 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -134,7 +134,11 @@ def new(self, *args, workspace=True, **kwargs): rev = self.repo.scm.get_rev() self._scm_checkout(rev) if workspace: - self._patch_exp() + try: + self._patch_exp() + except UnchangedExperimentError as exc: + logger.info("Reproducing existing experiment '%s'.", rev[:7]) + raise exc else: # configure params via command line here pass @@ -142,6 +146,7 @@ def new(self, *args, workspace=True, **kwargs): stages = self._reproduce(*args, **kwargs) exp_rev = self._commit(stages, rev=rev) self.checkout_exp(exp_rev, force=True) + logger.info("Generated experiment '%s'.", exp_rev[:7]) return stages def checkout_exp(self, rev, force=False): diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index c0da3d22b1..a6f84774f0 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -79,9 +79,9 @@ def reproduce( all_pipelines=all_pipelines, **kwargs ) - except UnchangedExperimentError as exc: + except UnchangedExperimentError: # If experiment contains no changes, just run regular repro - logger.debug(exc) + pass interactive = kwargs.get("interactive", False) if not interactive: From 1997bd96e805a317fc3470caf65ba3dd154ce13b Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 18:15:35 +0900 Subject: [PATCH 12/15] experiments: suppress help messages while feature is in development * experiment related commands will be accessible but hidden from the default command help messages --- dvc/command/experiments.py | 1 - dvc/command/repro.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index 0cfef7e1bc..1a5024abb1 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -225,7 +225,6 @@ def add_parser(subparsers, parent_parser): "experiments", parents=[parent_parser], description=append_doc_link(EXPERIMENTS_HELP, "experiments"), - help=EXPERIMENTS_HELP, formatter_class=argparse.RawDescriptionHelpFormatter, ) diff --git a/dvc/command/repro.py b/dvc/command/repro.py index 31ff6b3938..2b06ec03e5 100644 --- a/dvc/command/repro.py +++ b/dvc/command/repro.py @@ -172,6 +172,6 @@ def add_parser(subparsers, parent_parser): "--experiment", action="store_true", default=False, - help="Save reproduction results as an experiment.", + help=argparse.SUPPRESS, ) repro_parser.set_defaults(func=CmdRepro) From 4aa69426f42b8651bd09e8250c5f18a280ab8d72 Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 18:56:27 +0900 Subject: [PATCH 13/15] add simple experiments command tests --- tests/unit/command/test_experiments.py | 51 ++++++++++++++++++++++++++ tests/unit/command/test_repro.py | 1 + 2 files changed, 52 insertions(+) create mode 100644 tests/unit/command/test_experiments.py diff --git a/tests/unit/command/test_experiments.py b/tests/unit/command/test_experiments.py new file mode 100644 index 0000000000..8b1048147d --- /dev/null +++ b/tests/unit/command/test_experiments.py @@ -0,0 +1,51 @@ +from dvc.cli import parse_args +from dvc.command.experiments import CmdExperimentsDiff, CmdExperimentsShow + + +def test_experiments_diff(dvc, mocker): + cli_args = parse_args( + [ + "experiments", + "diff", + "HEAD~10", + "HEAD~1", + "--all", + "--show-json", + "--show-md", + "--old", + "--precision", + "10", + ] + ) + assert cli_args.func == CmdExperimentsDiff + + cmd = cli_args.func(cli_args) + m = mocker.patch("dvc.repo.experiments.diff.diff", return_value={}) + + assert cmd.run() == 0 + + m.assert_called_once_with( + cmd.repo, a_rev="HEAD~10", b_rev="HEAD~1", all=True + ) + + +def test_experiments_show(dvc, mocker): + cli_args = parse_args( + [ + "experiments", + "show", + "--all-tags", + "--all-branches", + "--all-commits", + ] + ) + assert cli_args.func == CmdExperimentsShow + + cmd = cli_args.func(cli_args) + m = mocker.patch("dvc.repo.experiments.show.show", return_value={}) + + assert cmd.run() == 0 + + m.assert_called_once_with( + cmd.repo, all_tags=True, all_branches=True, all_commits=True + ) diff --git a/tests/unit/command/test_repro.py b/tests/unit/command/test_repro.py index d29c027661..fbcf1271fc 100644 --- a/tests/unit/command/test_repro.py +++ b/tests/unit/command/test_repro.py @@ -14,6 +14,7 @@ "single_item": False, "recursive": False, "force_downstream": False, + "experiment": False, } From a7b6d65ac333dacb841b38eb93f57f5caadd3b0a Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 18:58:26 +0900 Subject: [PATCH 14/15] experiments: disable feature by default * experiments only enabled in test environment (DVC_TEST) or when core.experiments config option is true --- dvc/command/experiments.py | 9 +++++++++ dvc/config.py | 1 + dvc/repo/__init__.py | 8 ++++++-- dvc/repo/experiments/__init__.py | 8 +++++++- dvc/repo/reproduce.py | 2 +- 5 files changed, 24 insertions(+), 4 deletions(-) diff --git a/dvc/command/experiments.py b/dvc/command/experiments.py index 1a5024abb1..3ec817bc70 100644 --- a/dvc/command/experiments.py +++ b/dvc/command/experiments.py @@ -114,6 +114,9 @@ def run(self): from rich.console import Console from dvc.utils.pager import pager + if not self.repo.experiments: + return 0 + try: all_experiments = self.repo.experiments.show( all_branches=self.args.all_branches, @@ -139,6 +142,9 @@ def run(self): class CmdExperimentsCheckout(CmdBase): def run(self): + if not self.repo.experiments: + return 0 + self.repo.experiments.checkout( self.args.experiment, force=self.args.force ) @@ -185,6 +191,9 @@ def _round(val): class CmdExperimentsDiff(CmdBase): def run(self): + if not self.repo.experiments: + return 0 + try: diff = self.repo.experiments.diff( a_rev=self.args.a_rev, diff --git a/dvc/config.py b/dvc/config.py index 3b083597a7..e1c9d93284 100644 --- a/dvc/config.py +++ b/dvc/config.py @@ -127,6 +127,7 @@ class RelPath(str): Optional("analytics", default=True): Bool, Optional("hardlink_lock", default=False): Bool, Optional("no_scm", default=False): Bool, + Optional("experiments", default=False): Bool, }, "cache": { "local": str, diff --git a/dvc/repo/__init__.py b/dvc/repo/__init__.py index aed49c17a3..46b57a6054 100644 --- a/dvc/repo/__init__.py +++ b/dvc/repo/__init__.py @@ -127,7 +127,10 @@ def __init__(self, root_dir=None, scm=None, rev=None): self.plots = Plots(self) self.params = Params(self) - self.experiments = Experiments(self) + try: + self.experiments = Experiments(self) + except NotImplementedError: + self.experiments = None self._ignore() @@ -196,8 +199,9 @@ def _ignore(self): flist = [ self.config.files["local"], self.tmp_dir, - self.experiments.exp_dir, ] + if self.experiments: + flist.append(self.experiments.exp_dir) if path_isin(self.cache.local.cache_dir, self.root_dir): flist += [self.cache.local.cache_dir] diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index adf4118468..5fe03197b6 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -8,7 +8,7 @@ from dvc.exceptions import DvcException from dvc.scm.git import Git from dvc.stage.serialize import to_lockfile -from dvc.utils import dict_sha256, relpath +from dvc.utils import dict_sha256, env2bool, relpath from dvc.utils.fs import remove logger = logging.getLogger(__name__) @@ -28,6 +28,12 @@ class Experiments: EXPERIMENTS_DIR = "experiments" def __init__(self, repo): + if not ( + env2bool("DVC_TEST") + or repo.config["core"].get("experiments", False) + ): + raise NotImplementedError + self.repo = repo @cached_property diff --git a/dvc/repo/reproduce.py b/dvc/repo/reproduce.py index a6f84774f0..e127c4ea42 100644 --- a/dvc/repo/reproduce.py +++ b/dvc/repo/reproduce.py @@ -71,7 +71,7 @@ def reproduce( ) experiment = kwargs.pop("experiment", False) - if experiment: + if experiment and self.experiments: try: return self.experiments.new( target=target, From 6d5f3459045edbeda88074b077eeef8f0fecd66d Mon Sep 17 00:00:00 2001 From: Peter Rowlands Date: Thu, 16 Jul 2020 19:49:53 +0900 Subject: [PATCH 15/15] use fetch on checkout missing revs --- dvc/repo/experiments/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 5fe03197b6..dcbe550354 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -90,8 +90,8 @@ def _config_clone(self): def _scm_checkout(self, rev): self.scm.repo.git.reset(hard=True) - # if not Git.is_sha(rev) or not self.scm.has_rev(rev): - # self.scm.pull() + if not Git.is_sha(rev) or not self.scm.has_rev(rev): + self.scm.fetch(all=True) logger.debug("Checking out base experiment commit '%s'", rev) self.scm.checkout(rev)