Skip to content

Commit

Permalink
metrics: introduce diff
Browse files Browse the repository at this point in the history
This first implementation is based on existing `dvc metrics show`
functionality, but is also able to auto-detect json to properly
show diff for it.
  • Loading branch information
efiop committed Jan 13, 2020
1 parent 537a0af commit e393e29
Show file tree
Hide file tree
Showing 8 changed files with 390 additions and 19 deletions.
122 changes: 121 additions & 1 deletion dvc/command/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ def show_metrics(metrics, all_branches=False, all_tags=False):
logger.info("{branch}:".format(branch=branch))

for fname, metric in val.items():
lines = metric if type(metric) is list else metric.splitlines()
if isinstance(metric, dict):
lines = list(metric.values())
elif isinstance(metric, list):
lines = metric
else:
lines = metric.splitlines()

if len(lines) > 1:
logger.info("\t{fname}:".format(fname=fname))
Expand Down Expand Up @@ -100,6 +105,59 @@ def run(self):
return 0


def _show_diff(diff):
from texttable import Texttable

if not diff:
return "No changes."

table = Texttable()

# remove borders to make it easier for users to copy stuff
table.set_chars(("", "", "", ""))
table.set_deco(0)

rows = [["Path", "Metric", "Value", "Change"]]
for fname, mdiff in diff.items():
for metric, change in mdiff.items():
rows.append(
[
fname,
metric,
change["new"],
change.get("diff", "diff not supported"),
]
)
table.add_rows(rows)
return table.draw()


class CmdMetricsDiff(CmdBase):
def run(self):
try:
diff = self.repo.metrics.diff(
a_ref=self.args.a_ref,
b_ref=self.args.b_ref,
targets=self.args.targets,
typ=self.args.type,
xpath=self.args.xpath,
recursive=self.args.recursive,
)

if self.args.show_json:
import json

logger.info(json.dumps(diff))
else:
logger.info(_show_diff(diff))

except DvcException:
logger.exception("failed to show metrics diff")
return 1

return 0


def add_parser(subparsers, parent_parser):
METRICS_HELP = "Commands to add, manage, collect and display metrics."

Expand Down Expand Up @@ -214,3 +272,65 @@ def add_parser(subparsers, parent_parser):
)
metrics_remove_parser.add_argument("path", help="Path to a metric file.")
metrics_remove_parser.set_defaults(func=CmdMetricsRemove)

METRICS_DIFF_HELP = "Output metric values."
metrics_diff_parser = metrics_subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(METRICS_DIFF_HELP, "metrics/diff"),
help=METRICS_DIFF_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
metrics_diff_parser.add_argument(
"a_ref",
nargs="?",
help=(
"Git reference from which diff is calculated. "
"If omitted `HEAD`(latest commit) is used."
),
)
metrics_diff_parser.add_argument(
"b_ref",
nargs="?",
help=(
"Git reference to which diff is calculated. "
"If omitted current working tree is used."
),
)
metrics_diff_parser.add_argument(
"--targets",
nargs="*",
help=(
"Metric files or directories (see -R) to show diff for. "
"Shows diff for all metric files by default."
),
)
metrics_diff_parser.add_argument(
"-t",
"--type",
help=(
"Type of metrics (json/tsv/htsv/csv/hcsv). "
"It can be detected by the file extension automatically. "
"Unsupported types will be treated as raw."
),
)
metrics_diff_parser.add_argument(
"-x", "--xpath", help="json/tsv/htsv/csv/hcsv path."
)
metrics_diff_parser.add_argument(
"-R",
"--recursive",
action="store_true",
default=False,
help=(
"If any target is a directory, recursively search and process "
"metric files."
),
)
metrics_diff_parser.add_argument(
"--show-json",
action="store_true",
default=False,
help="Show output in JSON format.",
)
metrics_diff_parser.set_defaults(func=CmdMetricsDiff)
6 changes: 3 additions & 3 deletions dvc/repo/brancher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def brancher( # noqa: E302
self, all_branches=False, all_tags=False, all_commits=False
self, revs=None, all_branches=False, all_tags=False, all_commits=False
):
"""Generator that iterates over specified revisions.
Expand All @@ -20,12 +20,12 @@ def brancher( # noqa: E302
- empty string it there is no branches to iterate over
- "Working Tree" if there are uncommitted changes in the SCM repo
"""
if not any([all_branches, all_tags, all_commits]):
if not any([revs, all_branches, all_tags, all_commits]):
yield ""
return

saved_tree = self.tree
revs = []
revs = revs or []

scm = self.scm

Expand Down
5 changes: 5 additions & 0 deletions dvc/repo/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,8 @@ def remove(self, *args, **kwargs):
from dvc.repo.metrics.remove import remove

return remove(self.repo, *args, **kwargs)

def diff(self, *args, **kwargs):
from .diff import diff

return diff(self.repo, *args, **kwargs)
105 changes: 105 additions & 0 deletions dvc/repo/metrics/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import json
from collections import defaultdict

from flatten_dict import flatten

from dvc.exceptions import NoMetricsError


def _parse(raw):
if isinstance(raw, (dict, list, int, float)):
return raw

assert isinstance(raw, str)
try:
return json.loads(raw)
except json.JSONDecodeError:
return raw


def _diff_vals(old, new):
if (
isinstance(new, list)
and isinstance(old, list)
and len(old) == len(new) == 1
):
return _diff_vals(old[0], new[0])

if old == new:
return {}

res = {"old": old, "new": new}
if isinstance(new, (int, float)) and isinstance(old, (int, float)):
res["diff"] = new - old
return res


# dot_reducer is not released yet (flatten-dict > 0.2.0)
def _dot(k1, k2):
if k1 is None:
return k2
return "{0}.{1}".format(k1, k2)


def _diff_dicts(old_dict, new_dict):
old_default = None
new_default = None

if isinstance(new_dict, dict):
new = flatten(new_dict, reducer=_dot)
else:
new = defaultdict(lambda: "not a dict")
new_default = "unable to parse"

if isinstance(old_dict, dict):
old = flatten(old_dict, reducer=_dot)
else:
old = defaultdict(lambda: "not a dict")
old_default = "unable to parse"

res = defaultdict(dict)

xpaths = set(old.keys())
xpaths.update(set(new.keys()))
for xpath in xpaths:
old_val = old.get(xpath, old_default)
new_val = new.get(xpath, new_default)
val_diff = _diff_vals(old_val, new_val)
if val_diff:
res[xpath] = val_diff
return dict(res)


def _diff(old_raw, new_raw):
old = _parse(old_raw)
new = _parse(new_raw)

if isinstance(new, dict) or isinstance(old, dict):
return _diff_dicts(old, new)

return {"": _diff_vals(old, new)}


def _get_metrics(repo, *args, rev=None, **kwargs):
try:
metrics = repo.metrics.show(
*args, **kwargs, revs=[rev] if rev else None
)
return metrics[rev or ""]
except NoMetricsError:
return {}


def diff(repo, *args, a_ref=None, b_ref=None, **kwargs):
old = _get_metrics(repo, *args, **kwargs, rev=(a_ref or "HEAD"))
new = _get_metrics(repo, *args, **kwargs, rev=b_ref)

paths = set(old.keys())
paths.update(set(new.keys()))

res = defaultdict(dict)
for path in paths:
path_diff = _diff(old[path], new[path])
if path_diff:
res[path] = path_diff
return dict(res)
7 changes: 5 additions & 2 deletions dvc/repo/metrics/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def _read_metric_json(fd, json_path):
parser = parse(json_path)
return [x.value for x in parser.find(json.load(fd))]
return {str(x.full_path): x.value for x in parser.find(json.load(fd))}


def _get_values(row):
Expand Down Expand Up @@ -266,6 +266,7 @@ def show(
all_branches=False,
all_tags=False,
recursive=False,
revs=None,
):
res = {}
found = set()
Expand All @@ -274,7 +275,9 @@ def show(
# Iterate once to call `_collect_metrics` on all the stages
targets = [None]

for branch in repo.brancher(all_branches=all_branches, all_tags=all_tags):
for branch in repo.brancher(
revs=revs, all_branches=all_branches, all_tags=all_tags
):
metrics = {}

for target in targets:
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def run(self):
"win-unicode-console>=0.5; sys_platform == 'win32'",
"pywin32>=225; sys_platform == 'win32'",
"networkx>=2.1,<2.4",
"flatten-dict>=0.2.0",
"texttable>=0.5.2",
]


Expand Down
Loading

0 comments on commit e393e29

Please sign in to comment.