Skip to content

Commit

Permalink
Metrics - plotting for multiple revisions initial (#3577)
Browse files Browse the repository at this point in the history
* init

* rename to plot data insertion basig on dicts update

* revision support

* roll back revision

* plot makedirs for backward compatibility

* log path

* pretty plot link to visualization page

* make target default title

* efiop review

* efiop review

* plot multiple initial

* add some missing metric file tests

* proper id generation

* proper id generation

* add confusion matrix template

* refactor tests

* plot from dvct file

* plot from dvct

* brush up commands

* fix confusion matrix multiple plot

* plot: change confusion matrix data schema

* should be working as intended

* support for src file in dvct files

* minor fixes

* plot: support json templates

* plot: rename confusion template

* plot: polish command behaviour

* fix test for json

* plot: test command

* some minor fixes for tests

* plot: unit test loading

* plot: unit test loading

* plot: handle TODOS

* cleanup

* use mocker

* plot: support tsv

* plot: command refactoring

* plot: fix windows issues with tests

* plot: test: some more windows fixes

* plot: _load_from_revisions complexity fix

* plot: reduce complexity

* plot: complexity reduction

* plot: deepsource suggestions

* plot: move template path evaluation

* fixup

* fixup

* exception on no datafile and no template

* json metric load with OrderedDict

* plot: improve handling non-existing files on revisions

* plot: improve handling non-existing files on revisions

* change default plot path

* some exceptions and fixes

* add yaml metrics support

* fixup

* some more suggestions

* default filename fix

* efiop review requests

* log exception on failur

* move revisions deduction to commands

* json templates

* extract template filling to separate method

* some parsing improvements

* add columns functionality

* extract default data transformation to separate method

* plot: initial support for jsonpath

* plot: rename columns to filters, tests are dict based

* plot: fixups

* plot: refactoring

* repo: plot: convert to package

* plot: data loading refactor, support searching for data

* plot: raise if wrong fields provided

* plot: command description

* plot: default: pass y axis info for default plot

* plot: get rid of fieldnames, expect ordered data

* plot: handle default plot in separate method

* plot: fix default

* plot: command option names fixes

* refactoring

* fixes

* plot: provide option for stdout redirection

* plot: rename show-json to no-html

* plot: add no-csv-header option

* plot: improve error message for wrongly structured metric

* plot: match template name exactly, whit suffix appended only

* plot: dmpetrov and ivan review

* plot: refactor --stdout help message

* plot: move template to repo/plot

* plot: add -x and -y options

* plot: add -x and -y options

* plot: command: order change

* plot: scatter

* plot: rename confusion matrix template, new name generation format

* plot: add title anchor

* plot: review from jorgeorpinel

* plot: rename filter and result options to select and file

* plot: add --title, --x-title, --y-title

* plot: xlab ylab

* Update dvc/repo/plot/template.py

Co-authored-by: Ruslan Kuprieiev <[email protected]>

* Update dvc/repo/plot/template.py

Co-authored-by: Ruslan Kuprieiev <[email protected]>

* efiop review

* plot: bash completion

* plot: static code analysis fixes

Co-authored-by: Ruslan Kuprieiev <[email protected]>
  • Loading branch information
pared and efiop authored May 1, 2020
1 parent 827c994 commit e553511
Show file tree
Hide file tree
Showing 12 changed files with 1,662 additions and 2 deletions.
2 changes: 2 additions & 0 deletions dvc/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
update,
version,
git_hook,
plot,
)
from .command.base import fix_subparsers
from .exceptions import DvcParserError
Expand Down Expand Up @@ -74,6 +75,7 @@
version,
update,
git_hook,
plot,
]


Expand Down
243 changes: 243 additions & 0 deletions dvc/command/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import argparse
import logging
import os

from dvc.command.base import append_doc_link, CmdBase, fix_subparsers
from dvc.exceptions import DvcException
from dvc.repo.plot.data import WORKSPACE_REVISION_NAME

logger = logging.getLogger(__name__)


class CmdPLot(CmdBase):
def _revisions(self):
raise NotImplementedError

def _result_file(self):
if self.args.file:
return self.args.file

extension = self._result_extension()
base = self._result_basename()

result_file = base + extension
return result_file

def _result_basename(self):
if self.args.datafile:
return self.args.datafile
return "plot"

def _result_extension(self):
if not self.args.no_html:
return ".html"
elif self.args.template:
return os.path.splitext(self.args.template)[-1]
return ".json"

def run(self):
fields = None
jsonpath = None
if self.args.select:
if self.args.select.startswith("$"):
jsonpath = self.args.select
else:
fields = set(self.args.select.split(","))
try:
plot_string = self.repo.plot(
datafile=self.args.datafile,
template=self.args.template,
revisions=self._revisions(),
fields=fields,
x_field=self.args.x,
y_field=self.args.y,
path=jsonpath,
embed=not self.args.no_html,
csv_header=not self.args.no_csv_header,
title=self.args.title,
x_title=self.args.xlab,
y_title=self.args.ylab,
)

if self.args.stdout:
logger.info(plot_string)
else:
result_path = self._result_file()
with open(result_path, "w") as fobj:
fobj.write(plot_string)

logger.info(
"file://{}".format(
os.path.join(self.repo.root_dir, result_path)
)
)

except DvcException:
logger.exception("")
return 1

return 0


class CmdPlotShow(CmdPLot):
def _revisions(self):
return None


class CmdPlotDiff(CmdPLot):
def _revisions(self):
revisions = self.args.revisions or []
if len(revisions) <= 1:
if len(revisions) == 0 and self.repo.scm.is_dirty():
revisions.append("HEAD")
revisions.append(WORKSPACE_REVISION_NAME)
return revisions


def add_parser(subparsers, parent_parser):
PLOT_HELP = (
"Generating plots for continuous metrics stored in structured files "
"(JSON, CSV, TSV)."
)

plot_parser = subparsers.add_parser(
"plot",
parents=[parent_parser],
description=append_doc_link(PLOT_HELP, "plot"),
help=PLOT_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
plot_subparsers = plot_parser.add_subparsers(
dest="cmd",
help="Use `dvc plot CMD --help` to display command-specific help.",
)

fix_subparsers(plot_subparsers)

SHOW_HELP = "Generate a plot image file from a continuous metrics file."
plot_show_parser = plot_subparsers.add_parser(
"show",
parents=[parent_parser],
description=append_doc_link(SHOW_HELP, "plot/show"),
help=SHOW_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
plot_show_parser.add_argument(
"-t",
"--template",
nargs="?",
default=None,
help="File to be injected with data.",
)
plot_show_parser.add_argument(
"-f", "--file", default=None, help="Name of the generated file."
)
plot_show_parser.add_argument(
"-s",
"--select",
default=None,
help="Choose which field(s) or JSONPath to include in the plot.",
)
plot_show_parser.add_argument(
"-x", default=None, help="Field name for x axis."
)
plot_show_parser.add_argument(
"-y", default=None, help="Field name for y axis."
)
plot_show_parser.add_argument(
"--stdout",
action="store_true",
default=False,
help="Print plot specification to stdout.",
)
plot_show_parser.add_argument(
"--no-csv-header",
action="store_true",
default=False,
help="Required when CSV or TSV datafile does not have a header.",
)
plot_show_parser.add_argument(
"--no-html",
action="store_true",
default=False,
help="Do not wrap Vega plot JSON with HTML.",
)
plot_show_parser.add_argument("--title", default=None, help="Plot title.")
plot_show_parser.add_argument("--xlab", default=None, help="X axis title.")
plot_show_parser.add_argument("--ylab", default=None, help="Y axis title.")
plot_show_parser.add_argument(
"datafile",
nargs="?",
default=None,
help="Continuous metrics file to visualize.",
)
plot_show_parser.set_defaults(func=CmdPlotShow)

PLOT_DIFF_HELP = (
"Plot continuous metrics differences between commits in the DVC "
"repository, or between the last commit and the workspace."
)
plot_diff_parser = plot_subparsers.add_parser(
"diff",
parents=[parent_parser],
description=append_doc_link(PLOT_DIFF_HELP, "plot/diff"),
help=PLOT_DIFF_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
plot_diff_parser.add_argument(
"-t",
"--template",
nargs="?",
default=None,
help="File to be injected with data.",
)
plot_diff_parser.add_argument(
"-d",
"--datafile",
nargs="?",
default=None,
help="Continuous metrics file to visualize.",
)
plot_diff_parser.add_argument(
"-f", "--file", default=None, help="Name of the generated file."
)
plot_diff_parser.add_argument(
"-s",
"--select",
default=None,
help="Choose which field(s) or JSONPath to include in the plot.",
)
plot_diff_parser.add_argument(
"-x", default=None, help="Field name for x axis."
)
plot_diff_parser.add_argument(
"-y", default=None, help="Field name for y axis."
)
plot_diff_parser.add_argument(
"--stdout",
action="store_true",
default=False,
help="Print plot specification to stdout.",
)
plot_diff_parser.add_argument(
"--no-csv-header",
action="store_true",
default=False,
help="Provided CSV ot TSV datafile does not have a header.",
)
plot_diff_parser.add_argument(
"--no-html",
action="store_true",
default=False,
help="Do not wrap Vega plot JSON with HTML.",
)
plot_diff_parser.add_argument("--title", default=None, help="Plot title.")
plot_diff_parser.add_argument("--xlab", default=None, help="X axis title.")
plot_diff_parser.add_argument("--ylab", default=None, help="Y axis title.")
plot_diff_parser.add_argument(
"revisions",
nargs="*",
default=None,
help="Git revisions to plot from",
)
plot_diff_parser.set_defaults(func=CmdPlotDiff)
7 changes: 7 additions & 0 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class Repo(object):
from dvc.repo.get import get
from dvc.repo.get_url import get_url
from dvc.repo.update import update
from dvc.repo.plot import plot

def __init__(self, root_dir=None):
from dvc.state import State
Expand Down Expand Up @@ -426,6 +427,12 @@ def stages(self):
"""
return self._collect_stages()

@cached_property
def plot_templates(self):
from dvc.repo.plot.template import PlotTemplates

return PlotTemplates(self.dvc_dir)

def _collect_stages(self):
from dvc.dvcfile import Dvcfile, is_valid_filename

Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def init(root_dir=os.curdir, no_scm=False, force=False, subdir=False):

proj = Repo(root_dir)

scm.add([config.files["repo"]])
scm.add([config.files["repo"], proj.plot_templates.templates_dir])

if scm.ignore_file:
scm.add([os.path.join(dvc_dir, scm.ignore_file)])
Expand Down
Loading

0 comments on commit e553511

Please sign in to comment.