Skip to content

Commit

Permalink
Experiments proof of concept (#4199)
Browse files Browse the repository at this point in the history
* experiments: add initial experiments.show

* only includes metrics + params (no code/data information)

* experiments: basic `dvc experiments show` functionality

* experiments: clone and run repro inside clone workspace

* repro: add -e/--experiment option

* experiments: hash experiments to identify duplicates

* experiments: update show

* experiments: checkout experiment after running repro

* experiments: add `dvc experiments checkout`

* experiments: add simple `dvc experiments diff` command

* just shows combined output from `metrics diff` and `plots diff` for
  now

* add very simple tests

* add some useful output messages

* experiments: suppress help messages while feature is in development

* experiment related commands will be accessible but hidden from the
  default command help messages

* add simple experiments command tests

* experiments: disable feature by default

* experiments only enabled in test environment (DVC_TEST) or when
  core.experiments config option is true

* use fetch on checkout missing revs
  • Loading branch information
pmrowla authored Jul 17, 2020
1 parent 4edf368 commit a56c8bb
Show file tree
Hide file tree
Showing 17 changed files with 806 additions and 1 deletion.
1 change: 1 addition & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
/pkg
/repos
/tmp
/experiments
2 changes: 2 additions & 0 deletions dvc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
data_sync,
destroy,
diff,
experiments,
freeze,
gc,
get,
Expand Down Expand Up @@ -77,6 +78,7 @@
update,
git_hook,
plots,
experiments,
]


Expand Down
358 changes: 358 additions & 0 deletions dvc/command/experiments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,358 @@
import argparse
import io
import logging
from collections import OrderedDict

from dvc.command.base import CmdBase, append_doc_link, fix_subparsers
from dvc.command.metrics import DEFAULT_PRECISION
from dvc.exceptions import DvcException

logger = logging.getLogger(__name__)


def _update_names(names, items):
from flatten_json import flatten

for name, item in items:
if isinstance(item, dict):
item = flatten(item, ".")
names.update(item.keys())
else:
names.add(name)


def _collect_names(all_experiments):
metric_names = set()
param_names = set()

for _, experiments in all_experiments.items():
for exp in experiments.values():
_update_names(metric_names, exp.get("metrics", {}).items())
_update_names(param_names, exp.get("params", {}).items())

return sorted(metric_names), sorted(param_names)


def _collect_rows(
base_rev, experiments, metric_names, param_names, precision=None
):
from flatten_json import flatten

if precision is None:
precision = DEFAULT_PRECISION

def _round(val):
if isinstance(val, float):
return round(val, precision)

return val

def _extend(row, names, items):
for fname, item in items:
if isinstance(item, dict):
item = flatten(item, ".")
else:
item = {fname: item}
for name in names:
if name in item:
row.append(str(_round(item[name])))
else:
row.append("-")

for i, (rev, exp) in enumerate(experiments.items()):
row = []
style = None
if rev == "baseline":
row.append(f"{base_rev}")
style = "bold"
elif i < len(experiments) - 1:
row.append(f"├── {rev[:7]}")
else:
row.append(f"└── {rev[:7]}")

_extend(row, metric_names, exp.get("metrics", {}).items())
_extend(row, param_names, exp.get("params", {}).items())

yield row, style


def _show_experiments(all_experiments, console, precision=None):
from rich.table import Table
from dvc.scm.git import Git

metric_names, param_names = _collect_names(all_experiments)

table = Table(row_styles=["white", "bright_white"])
table.add_column("Experiment", header_style="black on grey93")
for name in metric_names:
table.add_column(
name, justify="right", header_style="black on cornsilk1"
)
for name in param_names:
table.add_column(
name, justify="left", header_style="black on light_cyan1"
)

for base_rev, experiments in all_experiments.items():
if Git.is_sha(base_rev):
base_rev = base_rev[:7]

for row, style, in _collect_rows(
base_rev,
experiments,
metric_names,
param_names,
precision=precision,
):
table.add_row(*row, style=style)

console.print(table)


class CmdExperimentsShow(CmdBase):
def run(self):
from rich.console import Console
from dvc.utils.pager import pager

if not self.repo.experiments:
return 0

try:
all_experiments = self.repo.experiments.show(
all_branches=self.args.all_branches,
all_tags=self.args.all_tags,
all_commits=self.args.all_commits,
)

# Note: rich does not currently include a native way to force
# infinite width for use with a pager
console = Console(
file=io.StringIO(), force_terminal=True, width=9999
)

_show_experiments(all_experiments, console)

pager(console.file.getvalue())
except DvcException:
logger.exception("failed to show experiments")
return 1

return 0


class CmdExperimentsCheckout(CmdBase):
def run(self):
if not self.repo.experiments:
return 0

self.repo.experiments.checkout(
self.args.experiment, force=self.args.force
)

return 0


def _show_diff(
diff, title="", markdown=False, no_path=False, old=False, precision=None
):
from dvc.utils.diff import table

if precision is None:
precision = DEFAULT_PRECISION

def _round(val):
if isinstance(val, float):
return round(val, precision)

return val

rows = []
for fname, diff_ in diff.items():
sorted_diff = OrderedDict(sorted(diff_.items()))
for item, change in sorted_diff.items():
row = [] if no_path else [fname]
row.append(item)
if old:
row.append(_round(change.get("old")))
row.append(_round(change["new"]))
row.append(_round(change.get("diff", "diff not supported")))
rows.append(row)

header = [] if no_path else ["Path"]
header.append(title)
if old:
header.extend(["Old", "New"])
else:
header.append("Value")
header.append("Change")

return table(header, rows, markdown)


class CmdExperimentsDiff(CmdBase):
def run(self):
if not self.repo.experiments:
return 0

try:
diff = self.repo.experiments.diff(
a_rev=self.args.a_rev,
b_rev=self.args.b_rev,
all=self.args.all,
)

if self.args.show_json:
import json

logger.info(json.dumps(diff))
else:
diffs = [("metrics", "Metric"), ("params", "Param")]
for key, title in diffs:
table = _show_diff(
diff[key],
title=title,
markdown=self.args.show_md,
no_path=self.args.no_path,
old=self.args.old,
precision=self.args.precision,
)
if table:
logger.info(table)
logger.info("")

except DvcException:
logger.exception("failed to show experiments diff")
return 1

return 0


def add_parser(subparsers, parent_parser):
EXPERIMENTS_HELP = "Commands to display and compare experiments."

experiments_parser = subparsers.add_parser(
"experiments",
parents=[parent_parser],
description=append_doc_link(EXPERIMENTS_HELP, "experiments"),
formatter_class=argparse.RawDescriptionHelpFormatter,
)

experiments_subparsers = experiments_parser.add_subparsers(
dest="cmd",
help="Use `dvc experiments CMD --help` to display "
"command-specific help.",
)

fix_subparsers(experiments_subparsers)

EXPERIMENTS_SHOW_HELP = "Print experiments."
experiments_show_parser = experiments_subparsers.add_parser(
"show",
parents=[parent_parser],
description=append_doc_link(EXPERIMENTS_SHOW_HELP, "experiments/show"),
help=EXPERIMENTS_SHOW_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
experiments_show_parser.add_argument(
"-a",
"--all-branches",
action="store_true",
default=False,
help="Show metrics for all branches.",
)
experiments_show_parser.add_argument(
"-T",
"--all-tags",
action="store_true",
default=False,
help="Show metrics for all tags.",
)
experiments_show_parser.add_argument(
"--all-commits",
action="store_true",
default=False,
help="Show metrics for all commits.",
)
experiments_show_parser.set_defaults(func=CmdExperimentsShow)

EXPERIMENTS_CHECKOUT_HELP = "Checkout experiments."
experiments_checkout_parser = experiments_subparsers.add_parser(
"checkout",
parents=[parent_parser],
description=append_doc_link(
EXPERIMENTS_CHECKOUT_HELP, "experiments/checkout"
),
help=EXPERIMENTS_CHECKOUT_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
experiments_checkout_parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
help="Overwrite your current workspace with changes from the "
"experiment.",
)
experiments_checkout_parser.add_argument(
"experiment", help="Checkout this experiment.",
)
experiments_checkout_parser.set_defaults(func=CmdExperimentsCheckout)

EXPERIMENTS_DIFF_HELP = (
"Show changes between experiments in the DVC repository."
)
experiments_diff_parser = experiments_subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(EXPERIMENTS_DIFF_HELP, "experiments/diff"),
help=EXPERIMENTS_DIFF_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
experiments_diff_parser.add_argument(
"a_rev", nargs="?", help="Old experiment to compare (defaults to HEAD)"
)
experiments_diff_parser.add_argument(
"b_rev",
nargs="?",
help="New experiment to compare (defaults to the current workspace)",
)
experiments_diff_parser.add_argument(
"--all",
action="store_true",
default=False,
help="Show unchanged metrics/params as well.",
)
experiments_diff_parser.add_argument(
"--show-json",
action="store_true",
default=False,
help="Show output in JSON format.",
)
experiments_diff_parser.add_argument(
"--show-md",
action="store_true",
default=False,
help="Show tabulated output in the Markdown format (GFM).",
)
experiments_diff_parser.add_argument(
"--old",
action="store_true",
default=False,
help="Show old metric/param value.",
)
experiments_diff_parser.add_argument(
"--no-path",
action="store_true",
default=False,
help="Don't show metric/param path.",
)
experiments_diff_parser.add_argument(
"--precision",
type=int,
help=(
"Round metrics/params to `n` digits precision after the decimal "
f"point. Rounds to {DEFAULT_PRECISION} digits by default."
),
metavar="<n>",
)
experiments_diff_parser.set_defaults(func=CmdExperimentsDiff)
8 changes: 8 additions & 0 deletions dvc/command/repro.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def run(self):
downstream=self.args.downstream,
recursive=self.args.recursive,
force_downstream=self.args.force_downstream,
experiment=self.args.experiment,
)

if len(stages) == 0:
Expand Down Expand Up @@ -166,4 +167,11 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Start from the specified stages when reproducing pipelines.",
)
repro_parser.add_argument(
"-e",
"--experiment",
action="store_true",
default=False,
help=argparse.SUPPRESS,
)
repro_parser.set_defaults(func=CmdRepro)
Loading

0 comments on commit a56c8bb

Please sign in to comment.