Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

metrics: introduce diff #3051

Merged
merged 1 commit into from
Jan 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 121 additions & 1 deletion dvc/command/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@ def show_metrics(metrics, all_branches=False, all_tags=False):
logger.info("{branch}:".format(branch=branch))

for fname, metric in val.items():
lines = metric if type(metric) is list else metric.splitlines()
if isinstance(metric, dict):
lines = list(metric.values())
elif isinstance(metric, list):
lines = metric
else:
lines = metric.splitlines()

if len(lines) > 1:
logger.info("\t{fname}:".format(fname=fname))
Expand Down Expand Up @@ -100,6 +105,59 @@ def run(self):
return 0


def _show_diff(diff):
from texttable import Texttable

if not diff:
return "No changes."

table = Texttable()

# remove borders to make it easier for users to copy stuff
table.set_chars(("", "", "", ""))
table.set_deco(0)

rows = [["Path", "Metric", "Value", "Change"]]
Copy link
Contributor Author

@efiop efiop Jan 14, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dmpetrov asked for this format specifically, so don't be surprised that we don't show old value here :)

for fname, mdiff in diff.items():
for metric, change in mdiff.items():
rows.append(
[
fname,
metric,
change["new"],
change.get("diff", "diff not supported"),
]
)
table.add_rows(rows)
return table.draw()


class CmdMetricsDiff(CmdBase):
def run(self):
try:
diff = self.repo.metrics.diff(
a_ref=self.args.a_ref,
b_ref=self.args.b_ref,
targets=self.args.targets,
typ=self.args.type,
xpath=self.args.xpath,
recursive=self.args.recursive,
)

if self.args.show_json:
import json

logger.info(json.dumps(diff))
else:
logger.info(_show_diff(diff))

except DvcException:
logger.exception("failed to show metrics diff")
return 1

return 0


def add_parser(subparsers, parent_parser):
METRICS_HELP = "Commands to add, manage, collect and display metrics."

Expand Down Expand Up @@ -214,3 +272,65 @@ def add_parser(subparsers, parent_parser):
)
metrics_remove_parser.add_argument("path", help="Path to a metric file.")
metrics_remove_parser.set_defaults(func=CmdMetricsRemove)

METRICS_DIFF_HELP = "Output metric values."
metrics_diff_parser = metrics_subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(METRICS_DIFF_HELP, "metrics/diff"),
help=METRICS_DIFF_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
metrics_diff_parser.add_argument(
"a_ref",
nargs="?",
help=(
"Git reference from which diff is calculated. "
"If omitted `HEAD`(latest commit) is used."
),
)
metrics_diff_parser.add_argument(
"b_ref",
nargs="?",
help=(
"Git reference to which diff is calculated. "
"If omitted current working tree is used."
),
)
metrics_diff_parser.add_argument(
"--targets",
nargs="*",
help=(
"Metric files or directories (see -R) to show diff for. "
"Shows diff for all metric files by default."
),
)
metrics_diff_parser.add_argument(
"-t",
"--type",
help=(
"Type of metrics (json/tsv/htsv/csv/hcsv). "
"It can be detected by the file extension automatically. "
"Unsupported types will be treated as raw."
),
)
metrics_diff_parser.add_argument(
"-x", "--xpath", help="json/tsv/htsv/csv/hcsv path."
)
metrics_diff_parser.add_argument(
"-R",
"--recursive",
action="store_true",
default=False,
help=(
"If any target is a directory, recursively search and process "
"metric files."
),
)
metrics_diff_parser.add_argument(
"--show-json",
action="store_true",
default=False,
help="Show output in JSON format.",
)
metrics_diff_parser.set_defaults(func=CmdMetricsDiff)
6 changes: 3 additions & 3 deletions dvc/repo/brancher.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def brancher( # noqa: E302
self, all_branches=False, all_tags=False, all_commits=False
self, revs=None, all_branches=False, all_tags=False, all_commits=False
):
"""Generator that iterates over specified revisions.

Expand All @@ -20,12 +20,12 @@ def brancher( # noqa: E302
- empty string it there is no branches to iterate over
- "Working Tree" if there are uncommitted changes in the SCM repo
"""
if not any([all_branches, all_tags, all_commits]):
if not any([revs, all_branches, all_tags, all_commits]):
yield ""
return

saved_tree = self.tree
revs = []
revs = revs or []

scm = self.scm

Expand Down
5 changes: 5 additions & 0 deletions dvc/repo/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,8 @@ def remove(self, *args, **kwargs):
from dvc.repo.metrics.remove import remove

return remove(self.repo, *args, **kwargs)

def diff(self, *args, **kwargs):
from .diff import diff

return diff(self.repo, *args, **kwargs)
105 changes: 105 additions & 0 deletions dvc/repo/metrics/diff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import json
from collections import defaultdict

from flatten_dict import flatten

from dvc.exceptions import NoMetricsError


def _parse(raw):
if isinstance(raw, (dict, list, int, float)):
return raw

assert isinstance(raw, str)
try:
return json.loads(raw)
except json.JSONDecodeError:
return raw


def _diff_vals(old, new):
if (
isinstance(new, list)
and isinstance(old, list)
and len(old) == len(new) == 1
):
return _diff_vals(old[0], new[0])

if old == new:
return {}

res = {"old": old, "new": new}
if isinstance(new, (int, float)) and isinstance(old, (int, float)):
res["diff"] = new - old
return res


# dot_reducer is not released yet (flatten-dict > 0.2.0)
def _dot(k1, k2):
if k1 is None:
return k2
return "{0}.{1}".format(k1, k2)


def _diff_dicts(old_dict, new_dict):
old_default = None
new_default = None

if isinstance(new_dict, dict):
new = flatten(new_dict, reducer=_dot)
else:
new = defaultdict(lambda: "not a dict")
new_default = "unable to parse"

if isinstance(old_dict, dict):
old = flatten(old_dict, reducer=_dot)
else:
old = defaultdict(lambda: "not a dict")
old_default = "unable to parse"

res = defaultdict(dict)

xpaths = set(old.keys())
xpaths.update(set(new.keys()))
for xpath in xpaths:
old_val = old.get(xpath, old_default)
new_val = new.get(xpath, new_default)
val_diff = _diff_vals(old_val, new_val)
if val_diff:
res[xpath] = val_diff
return dict(res)


def _diff(old_raw, new_raw):
old = _parse(old_raw)
new = _parse(new_raw)

if isinstance(new, dict) or isinstance(old, dict):
return _diff_dicts(old, new)

return {"": _diff_vals(old, new)}


def _get_metrics(repo, *args, rev=None, **kwargs):
try:
metrics = repo.metrics.show(
*args, **kwargs, revs=[rev] if rev else None
)
return metrics[rev or ""]
except NoMetricsError:
return {}


def diff(repo, *args, a_ref=None, b_ref=None, **kwargs):
old = _get_metrics(repo, *args, **kwargs, rev=(a_ref or "HEAD"))
new = _get_metrics(repo, *args, **kwargs, rev=b_ref)

paths = set(old.keys())
paths.update(set(new.keys()))

res = defaultdict(dict)
for path in paths:
path_diff = _diff(old[path], new[path])
if path_diff:
res[path] = path_diff
return dict(res)
7 changes: 5 additions & 2 deletions dvc/repo/metrics/show.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

def _read_metric_json(fd, json_path):
parser = parse(json_path)
return [x.value for x in parser.find(json.load(fd))]
return {str(x.full_path): x.value for x in parser.find(json.load(fd))}


def _get_values(row):
Expand Down Expand Up @@ -266,6 +266,7 @@ def show(
all_branches=False,
all_tags=False,
recursive=False,
revs=None,
):
res = {}
found = set()
Expand All @@ -274,7 +275,9 @@ def show(
# Iterate once to call `_collect_metrics` on all the stages
targets = [None]

for branch in repo.brancher(all_branches=all_branches, all_tags=all_tags):
for branch in repo.brancher(
revs=revs, all_branches=all_branches, all_tags=all_tags
):
metrics = {}

for target in targets:
Expand Down
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def run(self):
"win-unicode-console>=0.5; sys_platform == 'win32'",
"pywin32>=225; sys_platform == 'win32'",
"networkx>=2.1,<2.4",
"flatten-dict>=0.2.0",
"texttable>=0.5.2",
]


Expand Down
Loading