From fbb98a87d1ba0ddbe2ab05d4e60f4271099d9a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Redzy=C5=84ski?= Date: Mon, 31 Jan 2022 14:38:47 +0100 Subject: [PATCH] plots: introduce flexible plots configuration to dvcfiles Closes: #7086 --- dvc/commands/plots.py | 159 +++++-- dvc/output.py | 2 +- dvc/render/__init__.py | 1 + dvc/render/convert.py | 18 +- dvc/render/converter/__init__.py | 9 + .../image.py} | 11 +- dvc/render/converter/vega.py | 316 ++++++++++++++ dvc/render/match.py | 128 ++++-- dvc/render/vega_converter.py | 183 -------- dvc/repo/plots/__init__.py | 353 ++++++++++++--- dvc/schema.py | 3 + dvc/utils/plots.py | 2 + setup.cfg | 1 + tests/func/plots/test_diff.py | 57 +-- tests/func/plots/test_modify.py | 13 +- tests/func/plots/test_show.py | 158 +++++-- tests/func/test_live.py | 11 +- tests/integration/plots/conftest.py | 108 +++++ tests/integration/plots/test_json.py | 281 ------------ tests/integration/plots/test_plots.py | 411 ++++++++++++++++++ .../integration/plots/test_repo_plots_api.py | 113 ++--- tests/unit/command/test_plots.py | 43 +- tests/unit/render/test_convert.py | 158 +------ tests/unit/render/test_image_converter.py | 2 +- tests/unit/render/test_match.py | 247 ++++------- tests/unit/render/test_vega_converter.py | 167 ++++++- tests/unit/test_plots.py | 34 -- tests/utils/plots.py | 11 + 28 files changed, 1884 insertions(+), 1116 deletions(-) create mode 100644 dvc/render/converter/__init__.py rename dvc/render/{image_converter.py => converter/image.py} (85%) create mode 100644 dvc/render/converter/vega.py delete mode 100644 dvc/render/vega_converter.py create mode 100644 dvc/utils/plots.py delete mode 100644 tests/integration/plots/test_json.py create mode 100644 tests/integration/plots/test_plots.py delete mode 100644 tests/unit/test_plots.py create mode 100644 tests/utils/plots.py diff --git a/dvc/commands/plots.py b/dvc/commands/plots.py index 8caa782e6c..7a5c1570c6 100644 --- a/dvc/commands/plots.py +++ b/dvc/commands/plots.py @@ -24,6 +24,60 @@ def _show_json(renderers, split=False): ui.write_json(result) +def _adjust_vega_renderers(renderers): + from dvc.render import VERSION_FIELD + from dvc_render import VegaRenderer + + for r in renderers: + if isinstance(r, VegaRenderer): + if _data_versions_count(r) > 1: + summary = _summarize_version_infos(r) + for dp in r.datapoints: + vi = dp.pop(VERSION_FIELD, {}) + keys = list(vi.keys()) + for key in keys: + if not (len(summary.get(key, set())) > 1): + vi.pop(key) + if vi: + dp["rev"] = "::".join(vi.values()) + else: + for dp in r.datapoints: + dp.pop(VERSION_FIELD, {}) + + +def _summarize_version_infos(renderer): + from collections import defaultdict + + from dvc.render import VERSION_FIELD + + result = defaultdict(set) + + for dp in renderer.datapoints: + for key, value in dp.get(VERSION_FIELD, {}).items(): + result[key].add(value) + return dict(result) + + +def _data_versions_count(renderer): + from itertools import product + + summary = _summarize_version_infos(renderer) + x = product(summary.get("filename", {None}), summary.get("field", {None})) + return len(set(x)) + + +def _filter_unhandled_renderers(renderers): + # filtering out renderers currently unhandled by vscode extension + from dvc_render import VegaRenderer + + def _is_json_viable(r): + return not ( + isinstance(r, VegaRenderer) and _data_versions_count(r) > 1 + ) + + return list(filter(_is_json_viable, renderers)) + + class CmdPlots(CmdBase): def _func(self, *args, **kwargs): raise NotImplementedError @@ -35,10 +89,28 @@ def _props(self): props = {p: getattr(self.args, p) for p in PLOT_PROPS} return {k: v for k, v in props.items() if v is not None} + def _config_files(self): + config_files = None + if self.args.from_config: + config_files = {self.args.from_config} + return config_files + + def _html_template_path(self): + html_template_path = self.args.html_template + if not html_template_path: + html_template_path = self.repo.config.get("plots", {}).get( + "html_template", None + ) + if html_template_path and not os.path.isabs(html_template_path): + html_template_path = os.path.join( + self.repo.dvc_dir, html_template_path + ) + return html_template_path + def run(self): from pathlib import Path - from dvc.render.match import match_renderers + from dvc.render.match import match_defs_renderers from dvc_render import render_html if self.args.show_vega: @@ -58,9 +130,10 @@ def run(self): return 1 try: - plots_data = self._func( - targets=self.args.targets, props=self._props() + targets=self.args.targets, + props=self._props(), + config_files=self._config_files(), ) if not plots_data: @@ -76,51 +149,43 @@ def run(self): renderers_out = ( out if self.args.json else os.path.join(out, "static") ) - renderers = match_renderers( - plots_data=plots_data, + + renderers = match_defs_renderers( + data=plots_data, out=renderers_out, templates_dir=self.repo.plots.templates_dir, ) - if self.args.show_vega: renderer = first(filter(lambda r: r.TYPE == "vega", renderers)) if renderer: ui.write_json(json.loads(renderer.get_filled_template())) return 0 if self.args.json: + renderers = _filter_unhandled_renderers(renderers) _show_json(renderers, self.args.split) return 0 - html_template_path = self.args.html_template - if not html_template_path: - html_template_path = self.repo.config.get("plots", {}).get( - "html_template", None - ) - if html_template_path and not os.path.isabs( - html_template_path - ): - html_template_path = os.path.join( - self.repo.dvc_dir, html_template_path - ) + _adjust_vega_renderers(renderers) output_file: Path = (Path.cwd() / out).resolve() / "index.html" - render_html( - renderers=renderers, - output_file=output_file, - template_path=html_template_path, - ) + if renderers: + render_html( + renderers=renderers, + output_file=output_file, + template_path=self._html_template_path(), + ) - ui.write(output_file.as_uri()) - auto_open = self.repo.config["plots"].get("auto_open", False) - if self.args.open or auto_open: - if not auto_open: - ui.write( - "To enable auto opening, you can run:\n" - "\n" - "\tdvc config plots.auto_open true" - ) - return ui.open_browser(output_file) + ui.write(output_file.as_uri()) + auto_open = self.repo.config["plots"].get("auto_open", False) + if self.args.open or auto_open: + if not auto_open: + ui.write( + "To enable auto opening, you can run:\n" + "\n" + "\tdvc config plots.auto_open true" + ) + return ui.open_browser(output_file) return 0 @@ -188,10 +253,7 @@ def run(self): def add_parser(subparsers, parent_parser): - PLOTS_HELP = ( - "Commands to visualize and compare plot metrics in structured files " - "(JSON, YAML, CSV, TSV)." - ) + PLOTS_HELP = "Commands to visualize and compare plot data." plots_parser = subparsers.add_parser( "plots", @@ -207,7 +269,10 @@ def add_parser(subparsers, parent_parser): fix_subparsers(plots_subparsers) - SHOW_HELP = "Generate plots from metrics files." + SHOW_HELP = ( + "Generate plots from target files or plots definitions from " + "`dvc.yaml` file." + ) plots_show_parser = plots_subparsers.add_parser( "show", parents=[parent_parser], @@ -218,8 +283,8 @@ def add_parser(subparsers, parent_parser): plots_show_parser.add_argument( "targets", nargs="*", - help="Files to visualize (supports any file, " - "even when not found as `plots` in `dvc.yaml`). " + help="Plots to visualize. Supports any file path, or plot name " + "defined in `dvc.yaml`. " "Shows all plots by default.", ).complete = completion.FILE _add_props_arguments(plots_show_parser) @@ -228,7 +293,7 @@ def add_parser(subparsers, parent_parser): plots_show_parser.set_defaults(func=CmdPlotsShow) PLOTS_DIFF_HELP = ( - "Show multiple versions of plot metrics " + "Show multiple versions of plot data " "by plotting them in a single image." ) plots_diff_parser = plots_subparsers.add_parser( @@ -242,8 +307,8 @@ def add_parser(subparsers, parent_parser): "--targets", nargs="*", help=( - "Specific plots file(s) to visualize " - "(even if not found as `plots` in `dvc.yaml`). " + "Specific plots to visualize. " + "Accepts any file path or plot name from `dvc.yaml` file. " "Shows all tracked plots by default." ), metavar="", @@ -264,7 +329,7 @@ def add_parser(subparsers, parent_parser): plots_diff_parser.set_defaults(func=CmdPlotsDiff) PLOTS_MODIFY_HELP = ( - "Modify display properties of data-series plots " + "Modify display properties of data-series plot outputs " "(has no effect on image-type plots)." ) plots_modify_parser = plots_subparsers.add_parser( @@ -275,7 +340,7 @@ def add_parser(subparsers, parent_parser): formatter_class=argparse.RawDescriptionHelpFormatter, ) plots_modify_parser.add_argument( - "target", help="Metric file to set properties to" + "target", help="Plot output to set properties to" ).complete = completion.FILE _add_props_arguments(plots_modify_parser) plots_modify_parser.add_argument( @@ -385,3 +450,9 @@ def _add_ui_arguments(parser): help="Custom HTML template for VEGA visualization.", metavar="", ) + parser.add_argument( + "--from-config", + default=None, + metavar="", + help=argparse.SUPPRESS, + ) diff --git a/dvc/output.py b/dvc/output.py index 9c5bea6d45..74d29c7a7a 100644 --- a/dvc/output.py +++ b/dvc/output.py @@ -1086,7 +1086,7 @@ def is_metric(self) -> bool: @property def is_plot(self) -> bool: - return bool(self.plot) + return bool(self.plot) or bool(self.live) ARTIFACT_SCHEMA = { diff --git a/dvc/render/__init__.py b/dvc/render/__init__.py index d9eded6f3e..bedc1414cf 100644 --- a/dvc/render/__init__.py +++ b/dvc/render/__init__.py @@ -1,6 +1,7 @@ INDEX_FIELD = "step" REVISION_FIELD = "rev" FILENAME_FIELD = "filename" +VERSION_FIELD = "dvc_data_version_info" REVISIONS_KEY = "revisions" TYPE_KEY = "type" SRC_FIELD = "src" diff --git a/dvc/render/convert.py b/dvc/render/convert.py index c93bb16721..9c9b3718e5 100644 --- a/dvc/render/convert.py +++ b/dvc/render/convert.py @@ -3,8 +3,8 @@ from typing import Dict, List, Union from dvc.render import REVISION_FIELD, REVISIONS_KEY, SRC_FIELD, TYPE_KEY -from dvc.render.image_converter import ImageConverter -from dvc.render.vega_converter import VegaConverter +from dvc.render.converter.image import ImageConverter +from dvc.render.converter.vega import VegaConverter def _get_converter( @@ -20,20 +20,6 @@ def _get_converter( raise ValueError(f"Invalid renderer class {renderer_class}") -def to_datapoints(renderer_class, data: Dict, props: Dict): - converter = _get_converter(renderer_class, props) - datapoints: List[Dict] = [] - final_props: Dict = {} - for revision, rev_data in data.items(): - for filename, file_data in rev_data.get("data", {}).items(): - if "data" in file_data: - processed, final_props = converter.convert( - file_data.get("data"), revision, filename - ) - datapoints.extend(processed) - return datapoints, final_props - - def _group_by_rev(datapoints): grouped = defaultdict(list) for datapoint in datapoints: diff --git a/dvc/render/converter/__init__.py b/dvc/render/converter/__init__.py new file mode 100644 index 0000000000..4685b2f758 --- /dev/null +++ b/dvc/render/converter/__init__.py @@ -0,0 +1,9 @@ +from typing import Dict, Optional + + +class Converter: + def __init__(self, plot_properties: Optional[Dict] = None): + self.plot_properties = plot_properties or {} + + def convert(self, data, revision: str, filename: str, **kwargs): + raise NotImplementedError diff --git a/dvc/render/image_converter.py b/dvc/render/converter/image.py similarity index 85% rename from dvc/render/image_converter.py rename to dvc/render/converter/image.py index 4af77e7749..8d3a54581c 100644 --- a/dvc/render/image_converter.py +++ b/dvc/render/converter/image.py @@ -1,17 +1,16 @@ import base64 import os -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple +from typing import TYPE_CHECKING, Dict, List, Tuple from dvc.render import FILENAME_FIELD, REVISION_FIELD, SRC_FIELD +from . import Converter + if TYPE_CHECKING: from dvc.types import StrPath -class ImageConverter: - def __init__(self, plot_properties: Optional[Dict] = None): - self.plot_properties = plot_properties or {} - +class ImageConverter(Converter): @staticmethod def _write_image( path: "StrPath", @@ -36,7 +35,7 @@ def _encode_image( return f"data:image;base64,{base64_str}" def convert( - self, data: bytes, revision, filename + self, data, revision: str, filename: str, **kwargs ) -> Tuple[List[Dict], Dict]: """ Convert the DVC Plots content to DVC Render datapoints. diff --git a/dvc/render/converter/vega.py b/dvc/render/converter/vega.py new file mode 100644 index 0000000000..2cf03bb9bf --- /dev/null +++ b/dvc/render/converter/vega.py @@ -0,0 +1,316 @@ +from copy import deepcopy +from functools import partial +from typing import Dict, Iterable, List, Optional, Set, Union + +from funcy import first, project + +from dvc.exceptions import DvcException +from dvc.render import ( + FILENAME_FIELD, + INDEX_FIELD, + REVISION_FIELD, + VERSION_FIELD, +) + +from . import Converter + + +class FieldsNotFoundError(DvcException): + def __init__(self, expected_fields, found_fields): + expected_str = ", ".join(expected_fields) + found_str = ", ".join(found_fields) + super().__init__( + f"Could not find all provided fields ('{expected_str}') " + f"in data fields ('{found_str}')." + ) + + +class PlotDataStructureError(DvcException): + def __init__(self): + super().__init__( + "Plot data extraction failed. Please see " + "https://man.dvc.org/plots for supported data formats." + ) + + +def _filter_fields( + datapoints: List[Dict], fields: Set, **kwargs +) -> List[Dict]: + if not fields: + return datapoints + assert isinstance(fields, set) + + new_data = [] + for data_point in datapoints: + keys = set(data_point.keys()) + if not fields <= keys: + raise FieldsNotFoundError(fields, keys) + + new_data.append(project(data_point, fields)) + + return new_data + + +def _lists(dictionary: Dict): + for _, value in dictionary.items(): + if isinstance(value, dict): + yield from _lists(value) + elif isinstance(value, list): + yield value + + +def _find_first_list( + data: Union[Dict, List], fields: Set, **kwargs +) -> List[Dict]: + fields = fields or set() + + if not isinstance(data, dict): + return data + + for lst in _lists(data): + if ( + all(isinstance(dp, dict) for dp in lst) + # if fields is empty, it will match any set + and set(first(lst).keys()) & fields == fields + ): + return lst + + raise PlotDataStructureError() + + +def _append_index(datapoints: List[Dict], **kwargs) -> List[Dict]: + if INDEX_FIELD in first(datapoints).keys(): + return datapoints + + for index, data_point in enumerate(datapoints): + data_point[INDEX_FIELD] = index + return datapoints + + +class VegaConverter(Converter): + """ + Class that takes care of converting unspecified data blob + (Dict or List[Dict]) into datapoints (List[Dict]). + If some properties that are required by Template class are missing + ('x', 'y') it will attempt to fill in the blanks. + """ + + def __init__(self, plot_properties: Optional[Dict] = None): + super().__init__(plot_properties) + self.inferred_properties: Dict = {} + + self.steps = [] + + self._infer_x() + self._infer_fields() + + self.steps.append( + ( + "find_data", + partial( + _find_first_list, + fields=self.inferred_properties.get("fields", set()) + - {INDEX_FIELD}, + ), + ) + ) + + if not self.plot_properties.get("x", None): + self.steps.append(("append_index", partial(_append_index))) + + self.steps.append( + ( + "filter_fields", + partial( + _filter_fields, + fields=self.inferred_properties.get("fields", set()), + ), + ) + ) + + self.steps.append( + ( + "generate_y", + partial( + self._generate_y_values, + y_values=self.plot_properties.get("y", None), + ), + ) + ) + + def _infer_x(self): + if not self.plot_properties.get("x", None): + self.inferred_properties["x"] = INDEX_FIELD + + def skip_step(self, name: str): + self.steps = [(_name, fn) for _name, fn in self.steps if _name != name] + + def _infer_fields(self): + fields = self.plot_properties.get("fields", set()) + if fields: + fields = { + *fields, + self.plot_properties.get("x", None), + self.plot_properties.get("y", None), + self.inferred_properties.get("x", None), + } - {None} + self.inferred_properties["fields"] = fields + + def _infer_y(self, datapoints: List[Dict]): + if "y" not in self.plot_properties: + data_fields = list(first(datapoints)) + skip = ( + REVISION_FIELD, + self.plot_properties.get("x", None) + or self.inferred_properties.get("x"), + FILENAME_FIELD, + VERSION_FIELD, + ) + inferred_y = first( + f for f in reversed(data_fields) if f not in skip + ) + if "y" in self.inferred_properties: + previous_y = self.inferred_properties["y"] + if previous_y != inferred_y: + raise DvcException( + f"Inferred y ('{inferred_y}' value does not match" + f"previously matched one ('f{previous_y}')." + ) + else: + self.inferred_properties["y"] = inferred_y + + def convert( + self, + data, + revision: str, + filename: str, + skip: List = None, + **kwargs, + ): + """ + Convert the data. Fill necessary fields ('x', 'y') and return both + generated datapoints and updated properties. + """ + if not skip: + skip = [] + + processed = deepcopy(data) + + for step_name, step in self.steps: + + if step_name not in skip: + processed = step( # type: ignore + processed, + revision=revision, + filename=filename, + ) + + self._infer_y(processed) # type: ignore + return processed, {**self.plot_properties, **self.inferred_properties} + + def _generate_y_values( # noqa: C901 + self, + datapoints: List[Dict], + y_values: Optional[Union[str, List, Dict]], + revision: str, + filename: str, + **kwargs, + ) -> List[Dict]: + + result = [] + properties_update = {} + + def _add_version_info(datapoint, version_info): + tmp = datapoint.copy() + tmp[VERSION_FIELD] = version_info + tmp[REVISION_FIELD] = version_info["revision"] + return tmp + + def _version_info(revision, filename=None, field=None): + res = {"revision": revision} + if filename is not None: + res["filename"] = filename + if field is not None: + res["field"] = field + return res + + def _generate_y(datapoint, field): + tmp = datapoint.copy() + tmp["dvc_inferred_y_value"] = datapoint[field] + tmp = _add_version_info( + tmp, _version_info(revision, filename, field) + ) + if ( + "y_label" not in properties_update + and "y_label" not in self.plot_properties + ): + properties_update["y_label"] = "y" + + properties_update["y"] = "dvc_inferred_y_value" + + return tmp + + if not y_values: + for dp in datapoints: + result.append( + _add_version_info(dp, _version_info(revision, filename)) + ) + + if isinstance(y_values, str): + for datapoint in datapoints: + result.append( + _add_version_info( + datapoint, _version_info(revision, filename, y_values) + ) + ) + + if isinstance(y_values, list): + for datapoint in datapoints: + for y_val in y_values: + if y_val in datapoint: + result.append(_generate_y(datapoint, y_val)) + + if isinstance(y_values, dict): + + def _to_set(values: Iterable): + result = set() + for val in values: + if isinstance(val, list): + for elem in val: + result.add(elem) + else: + result.add(val) + + return result + + all_fields = _to_set(y_values.values()) + if ( + all([isinstance(field, str) for field in all_fields]) + and len(all_fields) == 1 + ): + # if we use the same field from all files, + # we dont have to generate it + for datapoint in datapoints: + result.append( + _add_version_info( + datapoint, _version_info(revision, filename) + ) + ) + properties_update.update({"y": all_fields.pop()}) + else: + for def_filename, val in y_values.items(): + if isinstance(val, str): + fields = [val] + if isinstance(val, list): + fields = val + for datapoint in datapoints: + for field in fields: + if field in datapoint and def_filename in filename: + result.append(_generate_y(datapoint, field)) + + self.inferred_properties = { + **self.inferred_properties, + **properties_update, + } + + return result diff --git a/dvc/render/match.py b/dvc/render/match.py index 7f5f623b09..96c9275b42 100644 --- a/dvc/render/match.py +++ b/dvc/render/match.py @@ -1,58 +1,110 @@ -from typing import TYPE_CHECKING, Dict, Optional +from collections import defaultdict +from typing import TYPE_CHECKING, Dict, List, Optional +import dpath.options import dpath.util +from funcy import last -from dvc_render import RENDERERS +from dvc.repo.plots import infer_data_sources +from dvc.utils.plots import get_plot_id -from .convert import to_datapoints +from .convert import _get_converter if TYPE_CHECKING: from dvc.types import StrPath +dpath.options.ALLOW_EMPTY_STRING_KEYS = True -def group_by_filename(plots_data: Dict) -> Dict: - grouped: Dict[str, Dict] = {} - for revision in plots_data.keys(): - data = plots_data[revision].get("data", {}) - for file in data.keys(): - content = data.get(file) - if content: - dpath.util.new( - grouped, [file, revision, "data", file], content - ) +def _squash_plots_properties(data: List) -> Dict: + configs = [last(group) for group in data] + resolved: Dict = {} + for config in reversed(configs): + resolved = {**resolved, **config} + return resolved - return grouped +class PlotsData: + def __init__(self, data: Dict): + self.data = data -def squash_plots_properties(data: Dict) -> Dict: - resolved: Dict[str, str] = {} - for rev_data in data.values(): - for file_data in rev_data.get("data", {}).values(): - props = file_data.get("props", {}) - resolved = {**resolved, **props} - return resolved + def group_definitions(self): + groups = defaultdict(list) + for rev, rev_content in self.data.items(): + for config_file, config_file_content in ( + rev_content.get("definitions", {}).get("data", {}).items() + ): + for plot_id, plot_definition in config_file_content.get( + "data", {} + ).items(): + full_id = get_plot_id(plot_id, config_file) + groups[full_id].append((rev, plot_id, plot_definition)) + return dict(groups) + + def get_definition_data(self, target_files, rev): + result = [] + for file in target_files: + file_content = ( + self.data.get(rev, {}) + .get("sources", {}) + .get("data", {}) + .get(file, {}) + .get("data", {}) + ) + if file_content: + result.append((file, file_content)) + return result -def match_renderers( - plots_data, - out: Optional["StrPath"] = None, + +def match_defs_renderers( + data, + out=None, templates_dir: Optional["StrPath"] = None, ): + + from dvc_render import ImageRenderer, VegaRenderer + + plots_data = PlotsData(data) renderers = [] - for filename, group in group_by_filename(plots_data).items(): - plot_properties = squash_plots_properties(group) - for renderer_class in RENDERERS: - if renderer_class.matches(filename, plot_properties): - if out is not None: - plot_properties["out"] = out - if templates_dir is not None: - plot_properties["template_dir"] = templates_dir - datapoints, plot_properties = to_datapoints( - renderer_class, group, plot_properties - ) - renderers.append( - renderer_class(datapoints, filename, **plot_properties) + renderer_cls = None + for plot_id, group in plots_data.group_definitions().items(): + plot_datapoints: List[Dict] = [] + props = _squash_plots_properties(group) + final_props: Dict = {} + + if out is not None: + props["out"] = out + if templates_dir is not None: + props["template_dir"] = templates_dir + + for rev, inner_id, plot_definition in group: + plot_sources = infer_data_sources(inner_id, plot_definition) + definitions_data = plots_data.get_definition_data( + plot_sources, rev + ) + + if ImageRenderer.matches(inner_id, None): + renderer_cls = ImageRenderer + renderer_id = inner_id + else: + renderer_cls = VegaRenderer + renderer_id = plot_id + + converter = _get_converter(renderer_cls, props) + + for filename, plot_data in definitions_data: + dps, final_props = converter.convert( + revision=rev, + filename=filename, + data=plot_data, ) - break + plot_datapoints.extend(dps) + + if "title" not in final_props: + final_props["title"] = renderer_id + if renderer_cls is not None: + renderers.append( + renderer_cls(plot_datapoints, renderer_id, **final_props) + ) return renderers diff --git a/dvc/render/vega_converter.py b/dvc/render/vega_converter.py deleted file mode 100644 index 0759e34cdb..0000000000 --- a/dvc/render/vega_converter.py +++ /dev/null @@ -1,183 +0,0 @@ -from copy import deepcopy -from functools import partial -from typing import Dict, List, Optional, Set, Union - -from funcy import first, project - -from dvc.exceptions import DvcException -from dvc.render import FILENAME_FIELD, INDEX_FIELD, REVISION_FIELD - - -class FieldsNotFoundError(DvcException): - def __init__(self, expected_fields, found_fields): - expected_str = ", ".join(expected_fields) - found_str = ", ".join(found_fields) - super().__init__( - f"Could not find all provided fields ('{expected_str}') " - f"in data fields ('{found_str}')." - ) - - -class PlotDataStructureError(DvcException): - def __init__(self): - super().__init__( - "Plot data extraction failed. Please see " - "https://man.dvc.org/plots for supported data formats." - ) - - -def _filter_fields(datapoints: List[Dict], fields: Set) -> List[Dict]: - if not fields: - return datapoints - assert isinstance(fields, set) - - new_data = [] - for data_point in datapoints: - keys = set(data_point.keys()) - if not fields <= keys: - raise FieldsNotFoundError(fields, keys) - - new_data.append(project(data_point, fields)) - - return new_data - - -def _lists(dictionary: Dict): - for _, value in dictionary.items(): - if isinstance(value, dict): - yield from _lists(value) - elif isinstance(value, list): - yield value - - -def _find_first_list(data: Union[Dict, List], fields: Set) -> List[Dict]: - fields = fields or set() - - if not isinstance(data, dict): - return data - - for lst in _lists(data): - if ( - all(isinstance(dp, dict) for dp in lst) - # if fields is empty, it will match any set - and set(first(lst).keys()) & fields == fields - ): - return lst - - raise PlotDataStructureError() - - -def _append_index(datapoints: List[Dict]) -> List[Dict]: - if INDEX_FIELD in first(datapoints).keys(): - return datapoints - - for index, data_point in enumerate(datapoints): - data_point[INDEX_FIELD] = index - return datapoints - - -class VegaConverter: - """ - Class that takes care of converting unspecified data blob - (Dict or List[Dict]) into datapoints (List[Dict]). - If some properties that are required by Template class are missing - ('x', 'y') it will attempt to fill in the blanks. - """ - - def __init__(self, plot_properties: Optional[Dict] = None): - plot_properties = plot_properties or {} - self.props = deepcopy(plot_properties) - self.inferred_props: Dict = {} - - self.steps = [] - - self._infer_x() - self._infer_fields() - - self.steps.append( - ( - "find_data", - partial( - _find_first_list, - fields=self.inferred_props.get("fields", set()) - - {INDEX_FIELD}, - ), - ) - ) - - if not self.props.get("x", None): - self.steps.append(("append_index", partial(_append_index))) - - self.steps.append( - ( - "filter_fields", - partial( - _filter_fields, - fields=self.inferred_props.get("fields", set()), - ), - ) - ) - - def _infer_x(self): - if not self.props.get("x", None): - self.inferred_props["x"] = INDEX_FIELD - - def skip_step(self, name: str): - self.steps = [(_name, fn) for _name, fn in self.steps if _name != name] - - def _infer_fields(self): - fields = self.props.get("fields", set()) - if fields: - fields = { - *fields, - self.props.get("x", None), - self.props.get("y", None), - self.inferred_props.get("x", None), - } - {None} - self.inferred_props["fields"] = fields - - def _infer_y(self, datapoints: List[Dict]): - if "y" not in self.props: - data_fields = list(first(datapoints)) - skip = ( - REVISION_FIELD, - self.props.get("x", None) or self.inferred_props.get("x"), - ) - inferred_y = first( - f for f in reversed(data_fields) if f not in skip - ) - if "y" in self.inferred_props: - previous_y = self.inferred_props["y"] - if previous_y != inferred_y: - raise DvcException( - f"Inferred y ('{inferred_y}' value does not match" - f"previously matched one ('f{previous_y}')." - ) - else: - self.inferred_props["y"] = inferred_y - - def convert( - self, - data: Dict, - revision: Optional[str] = None, - filename: Optional[str] = None, - ): - """ - Convert the data. Fill necessary fields ('x', 'y') and return both - generated datapoints and updated properties. - """ - processed = deepcopy(data) - - for _, step in self.steps: - processed = step(processed) # type: ignore - - self._infer_y(processed) # type: ignore - - if revision: - for datapoint in processed: - datapoint[REVISION_FIELD] = revision - if filename: - for datapoint in processed: - datapoint[FILENAME_FIELD] = filename - - return processed, {**self.props, **self.inferred_props} diff --git a/dvc/repo/plots/__init__.py b/dvc/repo/plots/__init__.py index ec00650f5e..6ef560a26f 100644 --- a/dvc/repo/plots/__init__.py +++ b/dvc/repo/plots/__init__.py @@ -2,7 +2,8 @@ import io import logging import os -from collections import OrderedDict +from collections import OrderedDict, defaultdict +from copy import deepcopy from functools import partial from typing import ( TYPE_CHECKING, @@ -12,8 +13,11 @@ Generator, List, Optional, + Set, ) +import dpath.options +import dpath.util from funcy import cached_property, first, project from dvc.exceptions import DvcException @@ -24,6 +28,8 @@ from dvc.output import Output from dvc.repo import Repo +dpath.options.ALLOW_EMPTY_STRING_KEYS = True + logger = logging.getLogger(__name__) @@ -63,14 +69,45 @@ def collect( recursive: bool = False, onerror: Optional[Callable] = None, props: Optional[Dict] = None, + config_files: Optional[Set[str]] = None, ) -> Generator[Dict, None, None]: - """Collects all props and data for plots. + """Collects plots definitions and data sources. Generator yielding a structure like: - {rev: {plots.csv: { - props: {x: ..., "header": ..., ...}, - data: "unstructured data (as stored for given extension)", - }}} + { + revision: + { + "definitions": + { + "data": + { + "config_file": + { + "data": + { + plot_id: + { + plot_config + } + } + } + } + }, + "sources": + { + "data": + { + "filename": + { + "data_source": callable loading the data, + "props": propreties for the file if it is + plots type output + } + } + } + } + + } """ from dvc.utils.collections import ensure_list @@ -80,68 +117,61 @@ def collect( if revs is not None and rev not in revs: continue rev = rev or "workspace" - yield { - rev: self._collect_from_revision( + + res: Dict = {} + definitions = _collect_definitions( + self.repo, + targets=targets, + revision=rev, + onerror=onerror, + config_files=config_files, + props=props, + ) + if definitions: + res[rev] = {"definitions": definitions} + + data_targets = _get_data_targets(definitions) + + res[rev]["sources"] = self._collect_data_sources( revision=rev, - targets=targets, + targets=data_targets, recursive=recursive, - onerror=onerror, props=props, + onerror=onerror, ) - } + yield res @error_handler - def _collect_from_revision( + def _collect_data_sources( self, targets: Optional[List[str]] = None, revision: Optional[str] = None, recursive: bool = False, - onerror: Optional[Callable] = None, props: Optional[Dict] = None, + onerror: Optional[Callable] = None, ): from dvc.fs.dvc import DvcFileSystem fs = DvcFileSystem(repo=self.repo) + + props = props or {} + plots = _collect_plots(self.repo, targets, revision, recursive) res: Dict[str, Any] = {} for fs_path, rev_props in plots.items(): - base = os.path.join(*fs.path.relparts(fs_path, fs.fs.root_marker)) - if fs.isdir(fs_path): - plot_files = [] - unpacking_res = _unpack_dir_files(fs, fs_path, onerror=onerror) - if "data" in unpacking_res: - for pi in unpacking_res.get( # pylint: disable=E1101 - "data" - ): - plot_files.append( - ( - pi, - os.path.join( - base, *fs.path.relparts(pi, fs_path) - ), - ) - ) - else: - res[base] = unpacking_res - else: - plot_files = [(fs_path, base)] - - props = props or {} - - for path, repo_path in plot_files: - joined_props = {**rev_props, **props} - res[repo_path] = {"props": joined_props} - res[repo_path].update( - { - "data_source": partial( - parse, - fs, - path, - props=joined_props, - onerror=onerror, - ) - } - ) + joined_props = {**rev_props, **props} + res[fs_path] = {"props": joined_props} + res[fs_path].update( + { + "data_source": partial( + parse, + fs, + fs_path, + props=joined_props, + onerror=onerror, + ) + } + ) return res def show( @@ -151,21 +181,21 @@ def show( props=None, recursive=False, onerror=None, + config_files: Optional[Set[str]] = None, ): if onerror is None: onerror = onerror_collect result: Dict[str, Dict] = {} for data in self.collect( - targets, revs, recursive, onerror=onerror, props=props + targets, + revs, + recursive, + onerror=onerror, + props=props, + config_files=config_files, ): - assert len(data) == 1 - revision_data = first(data.values()) - if "data" in revision_data: - for path_data in revision_data["data"].values(): - result_source = path_data.pop("data_source", None) - if result_source: - path_data.update(result_source()) + _resolve_data_sources(data) result.update(data) errored = errored_revisions(result) @@ -236,6 +266,16 @@ def _is_plot(out: "Output") -> bool: return bool(out.plot) or bool(out.live) +def _resolve_data_sources(plots_data: Dict): + for value in plots_data.values(): + if isinstance(value, dict): + if "data_source" in value: + data_source = value.pop("data_source") + assert callable(data_source) + value.update(data_source()) + _resolve_data_sources(value) + + def _collect_plots( repo: "Repo", targets: List[str] = None, @@ -260,6 +300,203 @@ def _collect_plots( return result +def _get_data_targets(definitions: Dict): + result: Set = set() + if "data" in definitions: + for content in definitions["data"].values(): + if "data" in content: + for plot_id, config in content["data"].items(): + result = result.union(infer_data_sources(plot_id, config)) + return result + + +def infer_data_sources(plot_id, config=None): + def _deduplicate(lst: List): + return list({elem: None for elem in lst}.keys()) + + y = config.get("y", None) + if isinstance(y, dict): + sources = list(y.keys()) + else: + sources = [plot_id] + + return _deduplicate(source for source in sources) + + +def _matches(targets, config_file, plot_id): + import re + + from dvc.utils.plots import get_plot_id + + if not targets: + return True + + full_id = get_plot_id(plot_id, config_file) + if any( + (re.match(target, plot_id) or re.match(target, full_id)) + for target in targets + ): + return True + return False + + +def _dvcfile_relpath(dvcfile): + fs = dvcfile.repo.dvcfs + + # TODO from_os_path changes abs to relative + # TODO we should be using `dvcfile.relpath` - in case of GitFS (plots diff) + # and invoking from some subdir `dvcfile.relpath` returns strange long + # relative paths + # ("../../../../../../dvc.yaml") - investigate + return fs.path.relpath( + fs.path.join("/", fs.from_os_path(dvcfile.path)), fs.path.getcwd() + ) + + +def _collect_output_plots( + repo, targets, props, onerror: Optional[Callable] = None +): + fs = repo.dvcfs + result: Dict[str, Dict] = {} + for plot in repo.index.plots: + plot_props = _plot_props(plot) + dvcfile = plot.stage.dvcfile + config_path = _dvcfile_relpath(dvcfile) + config_dirname = os.path.dirname(config_path) + if _matches(targets, config_path, str(plot)): + unpacked = unpack_if_dir( + fs, + fs.path.join(config_dirname, plot.def_path), + props={**plot_props, **props}, + onerror=onerror, + ) + + dpath.util.merge( + result, + {"": unpacked}, + ) + return result + + +def _adjust_definitions_to_cwd(fs, config_relpath, plots_definitions): + # TODO normopath normalizes to windows path on Windows + # investigate + + import posixpath + + result = defaultdict(dict) + + config_dirname = fs.path.dirname(config_relpath) + + for plot_id, plot_def in plots_definitions.items(): + + y_def = plot_def.get("y", None) if plot_def else None + if y_def is None or not isinstance(y_def, dict): + # plot_id is filename + new_plot_id = posixpath.normpath( + fs.path.join(config_dirname, plot_id) + ) + result[new_plot_id] = plot_def or {} + else: + new_plot_def = deepcopy(plot_def) + old_y = new_plot_def.pop("y") + new_y = {} + for filepath, val in old_y.items(): + new_y[ + posixpath.normpath(fs.path.join(config_dirname, filepath)) + ] = val + new_plot_def["y"] = new_y + result[plot_id] = new_plot_def + return dict(result) + + +def _collect_pipeline_files(repo, targets: List[str], props): + from dvc.dvcfile import PipelineFile + + result: Dict[str, Dict] = {} + dvcfiles = {stage.dvcfile for stage in repo.index.stages} + for dvcfile in dvcfiles: + if isinstance(dvcfile, PipelineFile): + dvcfile_path = _dvcfile_relpath(dvcfile) + dvcfile_defs = _adjust_definitions_to_cwd( + repo.fs, dvcfile_path, dvcfile.load().get("plots", {}) + ) + for plot_id, plot_props in dvcfile_defs.items(): + if plot_props is None: + plot_props = {} + if _matches(targets, dvcfile_path, plot_id): + dpath.util.merge( + result, + { + dvcfile_path: { + "data": {plot_id: {**plot_props, **props}} + } + }, + ) + return result + + +@error_handler +def _collect_definitions( + repo: "Repo", + targets=None, + config_files: Optional[Set[str]] = None, + props: Dict = None, + onerror: Optional[Callable] = None, + **kwargs, +) -> Dict: + + result: Dict = defaultdict(dict) + props = props or {} + + from dvc.fs.dvc import DvcFileSystem + + fs = DvcFileSystem(repo=repo) + + if not config_files: + dpath.util.merge(result, _collect_pipeline_files(repo, targets, props)) + + if targets or (not targets and not config_files): + dpath.util.merge( + result, + _collect_output_plots(repo, targets, props, onerror=onerror), + ) + + if config_files: + for path in config_files: + definitions = parse(fs, path) + definitions = _adjust_definitions_to_cwd( + repo.fs, path, definitions + ) + if definitions: + dpath.util.merge(result, {path: definitions}) + + for target in targets: + if not result or fs.exists(target): + unpacked = unpack_if_dir(fs, target, props=props, onerror=onerror) + dpath.util.merge(result[""], unpacked) + + return dict(result) + + +def unpack_if_dir( + fs, path, props: Dict[str, str], onerror: Optional[Callable] = None +): + result: Dict[str, Dict] = defaultdict(dict) + if fs.isdir(path): + unpacked = _unpack_dir_files(fs, path, onerror=onerror) + else: + unpacked = {"data": [path]} + + if "data" in unpacked: + for subpath in unpacked["data"]: + result["data"].update({subpath: props}) + else: + result.update(unpacked) + + return dict(result) + + @error_handler def parse(fs, path, props=None, **kwargs): props = props or {} diff --git a/dvc/schema.py b/dvc/schema.py index ff994c5f0c..67294bda8e 100644 --- a/dvc/schema.py +++ b/dvc/schema.py @@ -55,6 +55,8 @@ } } +PLOTS = "plots" +PLOTS_SCHEMA = dict PLOT_PROPS = { Output.PARAM_PLOT_TEMPLATE: str, Output.PARAM_PLOT_X: str, @@ -120,6 +122,7 @@ def validator(data): MULTI_STAGE_SCHEMA = { STAGES: SINGLE_PIPELINE_STAGE_SCHEMA, VARS_KWD: VARS_SCHEMA, + PLOTS: PLOTS_SCHEMA, } COMPILED_SINGLE_STAGE_SCHEMA = Schema(SINGLE_STAGE_SCHEMA) diff --git a/dvc/utils/plots.py b/dvc/utils/plots.py new file mode 100644 index 0000000000..fe88a26224 --- /dev/null +++ b/dvc/utils/plots.py @@ -0,0 +1,2 @@ +def get_plot_id(plot_id: str, config_file_path: str = ""): + return f"{config_file_path}::{plot_id}" if config_file_path else plot_id diff --git a/setup.cfg b/setup.cfg index 868181e78e..bc5e71b5ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -144,6 +144,7 @@ tests = # optional dependencies pywin32>=225; sys_platform == 'win32' dvclive[image]>=0.7.3 + beautifulsoup4==4.11.1 [options.packages.find] exclude = diff --git a/tests/func/plots/test_diff.py b/tests/func/plots/test_diff.py index 739938449c..2ce8c0e3b9 100644 --- a/tests/func/plots/test_diff.py +++ b/tests/func/plots/test_diff.py @@ -1,45 +1,48 @@ +from tests.utils.plots import get_plot + + def test_diff_dirty(tmp_dir, scm, dvc, run_copy_metrics): (tmp_dir / "metric_t.json").dump([{"y": 2}, {"y": 3}], sort_keys=True) run_copy_metrics( "metric_t.json", "metric.json", - plots_no_cache=["metric.json"], + plots=["metric.json"], + name="train", commit="init", ) metric_head = [{"y": 3}, {"y": 5}] (tmp_dir / "metric_t.json").dump_json(metric_head, sort_keys=True) - run_copy_metrics( - "metric_t.json", - "metric.json", - plots_no_cache=["metric.json"], - commit="second", - ) + dvc.reproduce() + scm.add(["dvc.lock"]) + scm.commit("second") metric_1 = [{"y": 5}, {"y": 6}] (tmp_dir / "metric_t.json").dump_json(metric_1, sort_keys=True) - run_copy_metrics( - "metric_t.json", "metric.json", plots_no_cache=["metric.json"] - ) - props = {"fields": {"y"}} + dvc.reproduce() + + props = {"fields": ["y"]} diff_result = dvc.plots.diff(props=props) - assert diff_result == { - "workspace": { - "data": {"metric.json": {"data": metric_1, "props": props}} - }, - "HEAD": { - "data": {"metric.json": {"data": metric_head, "props": props}} - }, - } + + assert get_plot(diff_result, "workspace", file="metric.json") == metric_1 + assert get_plot( + diff_result, "workspace", "definitions", file="", endkey="data" + ) == {"metric.json": props} + assert get_plot(diff_result, "HEAD", file="metric.json") == metric_head + assert get_plot( + diff_result, "HEAD", "definitions", file="", endkey="data" + ) == {"metric.json": props} + metric_2 = [{"y": 7}, {"y": 8}] (tmp_dir / "metric.json").dump_json(metric_2, sort_keys=True) diff_result = dvc.plots.diff(props=props) - assert diff_result == { - "workspace": { - "data": {"metric.json": {"data": metric_2, "props": props}} - }, - "HEAD": { - "data": {"metric.json": {"data": metric_head, "props": props}} - }, - } + assert get_plot(diff_result, "workspace", file="metric.json") == metric_2 + assert get_plot( + diff_result, "workspace", "definitions", file="", endkey="data" + ) == {"metric.json": props} + + assert get_plot(diff_result, "HEAD", file="metric.json") == metric_head + assert get_plot( + diff_result, "workspace", "definitions", file="", endkey="data" + ) == {"metric.json": props} diff --git a/tests/func/plots/test_modify.py b/tests/func/plots/test_modify.py index 2cea446d54..66b71d7442 100644 --- a/tests/func/plots/test_modify.py +++ b/tests/func/plots/test_modify.py @@ -1,10 +1,9 @@ -import os - import pytest from dvc.dvcfile import PIPELINE_LOCK from dvc.repo.plots import PropsNotFoundError from dvc.utils import relpath +from tests.utils.plots import get_plot def test_plots_modify_existing_template( @@ -78,8 +77,8 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): fname = "file.json" (tmp_dir / fname).dump_json(metric, sort_keys=True) - p1 = os.path.join("subdir", "p1.json") - p2 = os.path.join("subdir", "p2.json") + p1 = "subdir/p1.json" + p2 = "subdir/p2.json" tmp_dir.dvc.run( cmd=( f"mkdir subdir && python copy.py {fname} {p1} && " @@ -93,5 +92,7 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): dvc.plots.modify("subdir", {"title": "TITLE"}) result = dvc.plots.show() - assert result["workspace"]["data"][p1]["props"]["title"] == "TITLE" - assert result["workspace"]["data"][p2]["props"]["title"] == "TITLE" + assert get_plot(result, "workspace", typ="definitions", file="") == { + p1: {"title": "TITLE"}, + p2: {"title": "TITLE"}, + } diff --git a/tests/func/plots/test_show.py b/tests/func/plots/test_show.py index 4b5fc79457..7d2bf078da 100644 --- a/tests/func/plots/test_show.py +++ b/tests/func/plots/test_show.py @@ -1,7 +1,6 @@ import os import pytest -from funcy import get_in from dvc.cli import main from dvc.dvcfile import PIPELINE_FILE @@ -11,6 +10,7 @@ from dvc.utils import onerror_collect from dvc.utils.fs import remove from dvc.utils.serialize import EncodingError, YAMLFileCorruptedError +from tests.utils.plots import get_plot def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): @@ -38,9 +38,11 @@ def test_plot_cache_missing(tmp_dir, scm, dvc, caplog, run_copy_metrics): remove(stage.outs[0].cache_path) plots_data = dvc.plots.show(revs=["v1", "v2"], targets=["metric.json"]) - assert plots_data["v1"]["data"]["metric.json"]["data"] == metric1 + + assert get_plot(plots_data, "v1", file="metric.json") == metric1 assert isinstance( - plots_data["v2"]["data"]["metric.json"]["error"], FileNotFoundError + get_plot(plots_data, "v2", file="metric.json", endkey="error"), + FileNotFoundError, ) @@ -53,10 +55,9 @@ def test_plot_wrong_metric_type(tmp_dir, scm, dvc, run_copy_metrics): commit="add text metric", ) + result = dvc.plots.show(targets=["metric.txt"], onerror=onerror_collect) assert isinstance( - dvc.plots.show(targets=["metric.txt"], onerror=onerror_collect)[ - "workspace" - ]["data"]["metric.txt"]["error"], + get_plot(result, "workspace", file="metric.txt", endkey="error"), PlotMetricTypeError, ) @@ -73,7 +74,7 @@ def test_show_non_plot(tmp_dir, scm, use_dvc): plots = dvc.plots.show(targets=["metric.json"]) - assert plots["workspace"]["data"]["metric.json"]["data"] == metric + assert get_plot(plots, "workspace", file="metric.json") == metric def test_show_non_plot_and_plot_with_params( @@ -82,15 +83,22 @@ def test_show_non_plot_and_plot_with_params( metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] (tmp_dir / "metric.json").dump_json(metric, sort_keys=True) run_copy_metrics( - "metric.json", "metric2.json", plots_no_cache=["metric2.json"] + "metric.json", + "metric2.json", + plots_no_cache=["metric2.json"], + name="train", ) props = {"title": "TITLE"} dvc.plots.modify("metric2.json", props=props) result = dvc.plots.show(targets=["metric.json", "metric2.json"]) - assert "metric.json" in result["workspace"]["data"] - assert "metric2.json" in result["workspace"]["data"] - assert result["workspace"]["data"]["metric2.json"]["props"] == props + + assert get_plot(result, "workspace", file="metric.json") == metric + assert get_plot(result, "workspace", file="metric2.json") == metric + assert ( + get_plot(result, "workspace", file="metric2.json", endkey="props") + == props + ) def test_show_from_subdir(tmp_dir, dvc, capsys): @@ -112,7 +120,8 @@ def test_show_from_subdir(tmp_dir, dvc, capsys): def test_plots_show_non_existing(tmp_dir, dvc, caplog): result = dvc.plots.show(targets=["plot.json"]) assert isinstance( - result["workspace"]["data"]["plot.json"]["error"], FileNotFoundError + get_plot(result, "workspace", file="plot.json", endkey="error"), + FileNotFoundError, ) assert "'plot.json' was not found in current workspace." in caplog.text @@ -147,8 +156,9 @@ def test_plots_show_overlap(tmp_dir, dvc, run_copy_metrics, clear_before_run): dvc._reset() + result = dvc.plots.show(onerror=onerror_collect) assert isinstance( - dvc.plots.show(onerror=onerror_collect)["workspace"]["error"], + get_plot(result, "workspace", endkey="error"), OverlappingOutputPathsError, ) @@ -162,8 +172,8 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): fname = "file.json" (tmp_dir / fname).dump_json(metric, sort_keys=True) - p1 = os.path.join("subdir", "p1.json") - p2 = os.path.join("subdir", "p2.json") + p1 = "subdir/p1.json" + p2 = "subdir/p2.json" tmp_dir.dvc.run( cmd=( f"mkdir subdir && python copy.py {fname} {p1} && " @@ -178,9 +188,12 @@ def test_dir_plots(tmp_dir, dvc, run_copy_metrics): dvc.plots.modify("subdir", props) result = dvc.plots.show() - assert set(result["workspace"]["data"]) == {p1, p2} - assert result["workspace"]["data"][p1]["props"] == props - assert result["workspace"]["data"][p2]["props"] == props + + assert set(get_plot(result, "workspace")) == {p1, p2} + assert get_plot(result, "workspace", typ="definitions", file="") == { + p1: props, + p2: props, + } def test_ignore_parsing_error(tmp_dir, dvc, run_copy_metrics): @@ -191,19 +204,23 @@ def test_ignore_parsing_error(tmp_dir, dvc, run_copy_metrics): result = dvc.plots.show(onerror=onerror_collect) assert isinstance( - result["workspace"]["data"]["plot_file.json"]["error"], EncodingError + get_plot(result, "workspace", file="plot_file.json", endkey="error"), + EncodingError, ) @pytest.mark.parametrize( - "file,error_path", + "file,path_kwargs", ( - (PIPELINE_FILE, ["workspace", "error"]), - ("plot.yaml", ["workspace", "data", "plot.yaml", "error"]), + (PIPELINE_FILE, {"revision": "workspace", "endkey": "error"}), + ( + "plot.yaml", + {"revision": "workspace", "file": "plot.yaml", "endkey": "error"}, + ), ), ) def test_log_errors( - tmp_dir, scm, dvc, run_copy_metrics, file, error_path, capsys + tmp_dir, scm, dvc, run_copy_metrics, file, path_kwargs, capsys ): metric = [{"val": 2}, {"val": 3}] (tmp_dir / "metric_t.yaml").dump(metric) @@ -222,7 +239,7 @@ def test_log_errors( result = dvc.plots.show(onerror=onerror_collect) _, error = capsys.readouterr() - assert isinstance(get_in(result, error_path), YAMLFileCorruptedError) + assert isinstance(get_plot(result, **path_kwargs), YAMLFileCorruptedError) assert ( "DVC failed to load some plots for following revisions: 'workspace'." in error @@ -252,8 +269,8 @@ def test_plots_binary(tmp_dir, scm, dvc, run_copy_metrics, custom_template): fd.write(b"content2") result = dvc.plots.show(revs=["v1", "workspace"]) - assert result["v1"]["data"]["plot.jpg"]["data"] == b"content" - assert result["workspace"]["data"]["plot.jpg"]["data"] == b"content2" + assert get_plot(result, "v1", file="plot.jpg") == b"content" + assert get_plot(result, "workspace", file="plot.jpg") == b"content2" def test_collect_non_existing_dir(tmp_dir, dvc, run_copy_metrics): @@ -293,6 +310,91 @@ def test_collect_non_existing_dir(tmp_dir, dvc, run_copy_metrics): remove(subdir_stage.outs[0].fs_path) result = dvc.plots.show() - assert "error" in result["workspace"]["data"]["subdir"] + assert get_plot( + result, "workspace", typ="definitions", file="", endkey="error" + ) # make sure others gets loaded - assert result["workspace"]["data"]["plot.json"]["data"] == metric + assert get_plot(result, "workspace", file="plot.json") == metric + + +@pytest.mark.parametrize( + "plot_config,expected_datafiles", + [ + # TODO - enable providing data files for x + # ( + # { + # "comparison": { + # "x": {"data1.json": "a"}, + # "y": {"sub/dir/data2.json": "b"}, + # } + # }, + # ["data1.json", os.path.join("sub", "dir", "data2.json")], + # ), + ( + {"data1.json": {"x": "c", "y": "a", "title": "File as key test"}}, + ["data1.json"], + ), + ( + { + "infer_data_from_y": { + "x": "a", + "y": {"data1.json": "b", "sub/dir/data2.json": "c"}, + } + }, + ["data1.json", os.path.join("sub", "dir", "data2.json")], + ), + ], +) +@pytest.mark.parametrize("separate_config", [True, False]) +def test_load_from_config( + tmp_dir, + dvc, + plot_config, + expected_datafiles, + separate_config, + run_copy_metrics, +): + data = { + "data1.json": [ + {"a": 1, "b": 0.1, "c": 0.01}, + {"a": 2, "b": 0.2, "c": 0.02}, + ], + os.path.join("sub", "dir", "data.json"): [ + {"a": 6, "b": 0.6, "c": 0.06}, + {"a": 7, "b": 0.7, "c": 0.07}, + ], + } + + for filename, content in data.items(): + dirname = os.path.dirname(filename) + if dirname: + os.makedirs(dirname) + (tmp_dir / filename).dump_json(content, sort_keys=True) + + config_files = None + if separate_config: + (tmp_dir / "plot_config.json").dump_json(plot_config, sort_keys=True) + config_file = "plot_config.json" + config_files = {config_file} + else: + # TODO we need that to create any stage, as dvc.yaml plots + # collections bases on existing stages - fix collection + run_copy_metrics("data1.json", "copy.json", name="train") + + from dvc.utils.serialize import modify_yaml + + config_file = "dvc.yaml" + with modify_yaml(config_file) as dvcfile_content: + dvcfile_content["plots"] = plot_config + + result = dvc.plots.show(config_files=config_files) + + assert plot_config == get_plot( + result, "workspace", typ="definitions", file=config_file + ) + + for filename, content in data.items(): + if filename in expected_datafiles: + assert content == get_plot(result, "workspace", file=filename) + else: + assert filename not in get_plot(result, "workspace") diff --git a/tests/func/test_live.py b/tests/func/test_live.py index 8bc31a9c60..53ee3c6a49 100644 --- a/tests/func/test_live.py +++ b/tests/func/test_live.py @@ -1,4 +1,3 @@ -import os from copy import deepcopy from textwrap import dedent @@ -121,11 +120,11 @@ def test_live_provides_metrics(tmp_dir, dvc, live_stage): assert (tmp_dir / "logs").is_dir() plots_data = dvc.plots.show() - files = list(plots_data["workspace"]["data"]) - assert os.path.join("logs", "scalars", "accuracy.tsv") in files - assert os.path.join("logs", "scalars", "loss.tsv") in files - assert os.path.join("logs", "images", "0", "image.jpg") in files - assert os.path.join("logs", "images", "1", "image.jpg") in files + files = list(plots_data["workspace"]["sources"]["data"]) + assert "logs/scalars/accuracy.tsv" in files + assert "logs/scalars/loss.tsv" in files + assert "logs/images/0/image.jpg" in files + assert "logs/images/1/image.jpg" in files @pytest.mark.parametrize("typ", ("live", "live_no_cache")) diff --git a/tests/integration/plots/conftest.py b/tests/integration/plots/conftest.py index 8b607d5663..252107eaa1 100644 --- a/tests/integration/plots/conftest.py +++ b/tests/integration/plots/conftest.py @@ -40,6 +40,8 @@ def make(): plots=["confusion.json"], commit="confusion", ) + linear_props = {"title": "linear", "x": "x"} + dvc.plots.modify("linear.json", linear_props) confusion_props = { "title": "confusion matrix", "x": "predicted", @@ -79,3 +81,109 @@ def make(): yield image_v2, linear_v2, confusion_v2, confusion_props return make + + +@pytest.fixture +def repo_with_config_plots(tmp_dir, scm, dvc, run_copy_metrics): + def make(): + linear_train_v1 = [ + {"x": 1, "y": 0.1}, + {"x": 2, "y": 0.2}, + {"x": 3, "y": 0.3}, + ] + linear_test_v1 = [ + {"x": 1, "y": 0.2}, + {"x": 2, "y": 0.3}, + {"x": 3, "y": 0.4}, + ] + + confusion_train_v1 = [ + {"actual": 0, "predicted": 1}, + {"actual": 0, "predicted": 1}, + {"actual": 1, "predicted": 0}, + {"actual": 1, "predicted": 0}, + ] + confusion_test_v1 = [ + {"actual": 0, "predicted": 1}, + {"actual": 0, "predicted": 0}, + {"actual": 1, "predicted": 1}, + {"actual": 1, "predicted": 0}, + ] + + (tmp_dir / "linear_train_src.json").dump_json(linear_train_v1) + (tmp_dir / "linear_test_src.json").dump_json(linear_test_v1) + (tmp_dir / "confusion_train_src.json").dump_json(confusion_train_v1) + (tmp_dir / "confusion_test_src.json").dump_json(confusion_test_v1) + + scm.add( + [ + "linear_train_src.json", + "linear_test_src.json", + "confusion_train_src.json", + "confusion_test_src.json", + ] + ) + scm.commit("add data sources") + + run_copy_metrics( + "linear_train_src.json", + "linear_train.json", + name="linear_train", + outs=["linear_train.json"], + commit="linear_train", + ) + run_copy_metrics( + "linear_test_src.json", + "linear_test.json", + name="linear_test", + outs=["linear_test.json"], + commit="linear_test", + ) + run_copy_metrics( + "confusion_train_src.json", + "confusion_train.json", + name="confusion_train", + outs=["confusion_train.json"], + commit="confusion_train", + ) + run_copy_metrics( + "confusion_test_src.json", + "confusion_test.json", + name="confusion_test", + outs=["confusion_test.json"], + commit="confusion_test", + ) + plots_config = { + "linear_train_vs_test": { + "x": "x", + "y": {"linear_train.json": "y", "linear_test.json": "y"}, + "title": "linear plot", + }, + "confusion_train_vs_test": { + "x": "actual", + "y": { + "confusion_train.json": "predicted", + "confusion_test.json": "predicted", + }, + "template": "confusion", + }, + } + + from dvc.utils.serialize import modify_yaml + + with modify_yaml("dvc.yaml") as dvcfile_content: + dvcfile_content["plots"] = plots_config + + scm.add(["dvc.yaml", "dvc.lock"]) + scm.commit("commit dvc files") + yield { + "data": { + "linear_train.json": linear_train_v1, + "linear_test.json": linear_test_v1, + "confusion_train.json": confusion_train_v1, + "confusion_test.json": confusion_test_v1, + }, + "configs": {"dvc.yaml": plots_config}, + } + + return make diff --git a/tests/integration/plots/test_json.py b/tests/integration/plots/test_json.py deleted file mode 100644 index 75b7802b12..0000000000 --- a/tests/integration/plots/test_json.py +++ /dev/null @@ -1,281 +0,0 @@ -import json -import os -from copy import deepcopy -from typing import Dict, List -from urllib.parse import urlparse -from urllib.request import url2pathname - -import dpath.util -import pytest -from funcy import first - -from dvc.cli import main - -JSON_OUT = "vis_data" - - -def call(capsys, subcommand="show"): - capsys.readouterr() - assert ( - main(["plots", subcommand, "--json", "-o", JSON_OUT, "--split"]) == 0 - ) - split_json_out, _ = capsys.readouterr() - - split_json_result = json.loads(split_json_out) - - capsys.readouterr() - assert main(["plots", subcommand, "--json", "-o", JSON_OUT]) == 0 - json_out, _ = capsys.readouterr() - - json_result = json.loads(json_out) - - assert main(["plots", subcommand]) == 0 - html_path_out, _ = capsys.readouterr() - - parsed = urlparse(html_path_out.strip()) - abspath = url2pathname(parsed.path) - return abspath, json_result, split_json_result - - -def filter_fields(datapoints: List[Dict], fields: List[str]): - - tmp = deepcopy(datapoints) - for datapoint in tmp: - keys = set(datapoint.keys()) - for key in keys: - if key not in fields: - datapoint.pop(key) - return tmp - - -def verify_image(tmp_dir, version, filename, content, html_path, json_result): - assert os.path.exists(html_path) - with open(html_path, encoding="utf-8") as fd: - html_content = fd.read() - - image_data = {} - for datapoint in json_result[filename]: - if datapoint["revisions"] == [version]: - image_data = datapoint - break - assert image_data, f"{version} data for {filename} was not found" - assert image_data["type"] == "image" - assert image_data["url"] == str( - tmp_dir / JSON_OUT / f"{version}_{filename}" - ) - assert ( - tmp_dir / JSON_OUT / f"{version}_{filename}" - ).read_bytes() == content - - assert os.path.join("static", f"{version}_{filename}") in html_content - - # there should be no absolute paths in produced HTML - # TODO uncomment once dvc-render is adjusted - # assert str(tmp_dir) not in html_content - assert ( - tmp_dir / "dvc_plots" / "static" / f"{version}_{filename}" - ).read_bytes() == content - - -def _remove_blanks(text: str): - return text.replace("\t", "").replace("\n", "").replace(" ", "") - - -def verify_vega( - versions, - filename, - fields, - expected_data, - html_path, - json_result, - split_json_result, -): - - assert os.path.exists(html_path) - with open(html_path, encoding="utf-8") as fd: - html_content = fd.read() - assert _remove_blanks( - json.dumps(dpath.util.get(json_result, [filename, 0, "content"])) - ) in _remove_blanks(html_content) - - if isinstance(versions, str): - versions = [versions] - - for j in [json_result, split_json_result]: - assert len(j[filename]) == 1 - assert dpath.util.get(j, [filename, 0, "type"]) == "vega" - assert dpath.util.get(j, [filename, 0, "revisions"]) == versions - - assert dpath.util.get( - json_result, [filename, 0, "datapoints"] - ) == dpath.util.get(split_json_result, [filename, 0, "datapoints"]) - assert set(versions) == set( - dpath.util.get(json_result, [filename, 0, "datapoints"]).keys() - ) - - assert ( - filter_fields( - dpath.util.get( - json_result, [filename, 0, "content", "data", "values"] - ), - fields, - ) - == expected_data - ) - - assert ( - dpath.util.get( - split_json_result, [filename, 0, "content", "data", "values"] - ) - == "" - ) - - # if data is not considered, json and split json should be identical - json_content = deepcopy( - dpath.util.get(json_result, [filename, 0, "content"]) - ) - split_json_content = deepcopy( - dpath.util.get(split_json_result, [filename, 0, "content"]) - ) - dpath.util.set(json_content, ["data"], {}) - dpath.util.set(split_json_content, ["data"], {}) - assert json_content == split_json_content - - -def verify_vega_props(filename, json_result, title, x, y, **kwargs): - data = json_result[filename] - assert len(data) == 1 - data = first(data) - - assert dpath.util.get(data, ["content", "title"]) == title - - assert ( - dpath.util.get(data, ["content", "spec", "encoding", "x", "field"]) - == x - ) - assert ( - dpath.util.get(data, ["content", "spec", "encoding", "y", "field"]) - == y - ) - - -@pytest.mark.vscode -def test_no_plots(tmp_dir, scm, dvc, capsys): - html_path, json_result, split_json_result = call(capsys) - assert os.path.exists(html_path) - assert json_result == {} - assert split_json_result == {} - - -@pytest.mark.vscode -def test_repo_with_plots( - tmp_dir, scm, dvc, capsys, run_copy_metrics, repo_with_plots -): - repo_state = repo_with_plots() - - image_v1, linear_v1, confusion_v1, confusion_props = next(repo_state) - - html_path, json_result, split_json_result = call(capsys) - - verify_image( - tmp_dir, "workspace", "image.png", image_v1, html_path, json_result - ) - - verify_vega( - "workspace", - "linear.json", - ["x", "y"], - linear_v1, - html_path, - json_result, - split_json_result, - ) - - verify_vega( - "workspace", - "confusion.json", - ["actual", "predicted"], - confusion_v1, - html_path, - json_result, - split_json_result, - ) - verify_vega_props("confusion.json", json_result, **confusion_props) - - image_v2, linear_v2, confusion_v2, confusion_props = next(repo_state) - - html_path, json_result, split_json_result = call(capsys, subcommand="diff") - - verify_image( - tmp_dir, "workspace", "image.png", image_v2, html_path, json_result - ) - verify_image( - tmp_dir, "HEAD", "image.png", image_v1, html_path, json_result - ) - - verify_vega( - ["HEAD", "workspace"], - "linear.json", - ["x", "y"], - linear_v2 + linear_v1, - html_path, - json_result, - split_json_result, - ) - - verify_vega( - ["HEAD", "workspace"], - "confusion.json", - ["actual", "predicted"], - confusion_v2 + confusion_v1, - html_path, - json_result, - split_json_result, - ) - verify_vega_props("confusion.json", json_result, **confusion_props) - - -@pytest.mark.vscode -def test_repo_with_removed_plots(tmp_dir, capsys, repo_with_plots): - from dvc.utils.fs import remove - - next(repo_with_plots()) - - # even if there is no data, call should be successful - remove(tmp_dir / ".dvc" / "cache") - remove("linear.json") - remove("confusion.json") - remove("image.png") - - for s in {"show", "diff"}: - _, json_result, split_json_result = call(capsys, subcommand=s) - for p in {"linear.json", "confusion.json", "image.png"}: - assert json_result[p] == [] - assert split_json_result[p] == [] - - -def test_config_output_dir(tmp_dir, dvc, capsys): - subdir = tmp_dir / "subdir" - ret = main(["config", "plots.out_dir", os.fspath(subdir)]) - assert ret == 0 - - metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] - (tmp_dir / "metric.json").dump_json(metric, sort_keys=True) - - assert main(["plots", "show", "metric.json"]) == 0 - - out, _ = capsys.readouterr() - assert subdir.as_uri() in out - assert subdir.is_dir() - assert (subdir / "index.html").is_file() - - cli_arg_subdir = tmp_dir / "cli_option" - assert ( - main(["plots", "show", "-o", os.fspath(cli_arg_subdir), "metric.json"]) - == 0 - ) - - out, _ = capsys.readouterr() - assert cli_arg_subdir.as_uri() in out - assert cli_arg_subdir.is_dir() - assert (cli_arg_subdir / "index.html").is_file() diff --git a/tests/integration/plots/test_plots.py b/tests/integration/plots/test_plots.py new file mode 100644 index 0000000000..21d138ab43 --- /dev/null +++ b/tests/integration/plots/test_plots.py @@ -0,0 +1,411 @@ +import json +import os +from copy import deepcopy +from typing import Dict, List +from urllib.parse import urlparse +from urllib.request import url2pathname + +import dpath.util +import pytest +from bs4 import BeautifulSoup +from funcy import first + +from dvc.cli import main + +JSON_OUT = "vis_data" + + +def call(capsys, subcommand="show"): + capsys.readouterr() + assert ( + main(["plots", subcommand, "--json", "-o", JSON_OUT, "--split"]) == 0 + ) + split_json_out, _ = capsys.readouterr() + + split_json_result = json.loads(split_json_out) + + capsys.readouterr() + assert main(["plots", subcommand, "--json", "-o", JSON_OUT]) == 0 + json_out, _ = capsys.readouterr() + + json_result = json.loads(json_out) + + assert main(["plots", subcommand]) == 0 + html_path_out, _ = capsys.readouterr() + + parsed = urlparse(html_path_out.strip()) + abspath = url2pathname(parsed.path) + return abspath, json_result, split_json_result + + +def extract_vega_specs(html_path, plots_ids): + from dvc_render.base import Renderer + + result = {} + + with open(html_path, "r", encoding="utf-8") as fd: + content = fd.read() + + reader = BeautifulSoup(content, features="html.parser") + for plot_id in plots_ids: + clean_id = Renderer.remove_special_chars(plot_id) + div_id = f"plot_{clean_id}" + script = _remove_blanks(reader.find("div", id=div_id).script.text) + result[plot_id] = json.loads( + script.split("; vegaEmbed")[0].replace("var spec = ", "") + ) + + return result + + +def filter_fields(datapoints: List[Dict], fields: List[str]): + + tmp = deepcopy(datapoints) + for datapoint in tmp: + keys = set(datapoint.keys()) + for key in keys: + if key not in fields: + datapoint.pop(key) + return tmp + + +def drop_fields(datapoints: List[Dict], fields: List[str]): + tmp = deepcopy(datapoints) + for datapoint in tmp: + keys = set(datapoint.keys()) + for key in keys: + if key in fields: + datapoint.pop(key) + return tmp + + +def verify_image(path, version, filename, content, html_path, json_result): + assert os.path.exists(html_path) + with open(html_path, encoding="utf-8") as fd: + html_content = fd.read() + + image_data = {} + for datapoint in json_result[filename]: + if datapoint["revisions"] == [version]: + image_data = datapoint + break + + assert image_data, f"{version} data for {filename} was not found" + assert image_data["type"] == "image" + output_filename = filename.replace("/", "_") + output_name = f"{version}_{output_filename}" + assert image_data["url"] == str(path / JSON_OUT / output_name) + assert (path / JSON_OUT / output_name).read_bytes() == content + + assert os.path.join("static", output_name) in html_content + + # there should be no absolute paths in produced HTML + # TODO uncomment once dvc-render is adjusted + # assert str(path) not in html_content + assert ( + path / "dvc_plots" / "static" / output_name + ).read_bytes() == content + + +def _remove_blanks(text: str): + return " ".join(text.replace("\t", "").replace("\n", "").split()) + + +class hashabledict(dict): + def __hash__(self): + return hash(tuple(sorted(self.items()))) + + +def verify_vega( + versions, + html_result, + json_result, + split_json_result, +): + if isinstance(versions, str): + versions = [versions] + + for j in [json_result, split_json_result]: + assert len(j) == 1 + assert j[0]["type"] == "vega" + assert set(j[0]["revisions"]) == set(versions) + + assert json_result[0]["datapoints"] == split_json_result[0]["datapoints"] + assert set(versions) == set(json_result[0]["datapoints"].keys()) + + assert json_result[0]["content"]["data"]["values"] + assert html_result["data"]["values"] + assert ( + split_json_result[0]["content"]["data"]["values"] + == "" + ) + + def _assert_templates_equal( + html_template, filled_template, split_template + ): + # besides data, json and split json should be equal + path = ["data", "values"] + tmp1 = deepcopy(html_template) + tmp2 = deepcopy(filled_template) + tmp3 = deepcopy(split_template) + dpath.util.set(tmp1, path, {}) + dpath.util.set(tmp2, path, {}) + dpath.util.set(tmp3, path, {}) + + assert tmp1 == tmp2 == tmp3 + + _assert_templates_equal( + html_result, json_result[0]["content"], split_json_result[0]["content"] + ) + + +def verify_vega_props(plot_id, json_result, title, x, y, **kwargs): + data = json_result[plot_id] + assert len(data) == 1 + data = first(data) + + assert dpath.util.get(data, ["content", "title"]) == title + + try: + # TODO confusion_matrix_plot - need to find better way of asserting + # encoding as its place is not constant in vega + plot_x = dpath.util.get( + data, ["content", "spec", "encoding", "x", "field"] + ) + plot_y = dpath.util.get( + data, ["content", "spec", "encoding", "y", "field"] + ) + except KeyError: + # default plot + plot_x = dpath.util.get( + data, ["content", "layer", 0, "encoding", "x", "field"] + ) + plot_y = dpath.util.get( + data, ["content", "layer", 0, "encoding", "y", "field"] + ) + + assert plot_x == x + assert plot_y == y + + +def _update_datapoints(datapoints: List, update: Dict): + result = [] + for dp in datapoints: + tmp = dp.copy() + tmp.update(update) + result.append(tmp) + return result + + +@pytest.mark.vscode +def test_no_plots(tmp_dir, scm, dvc, capsys): + html_path, json_result, split_json_result = call(capsys) + assert not os.path.exists(html_path) + assert json_result == {} + assert split_json_result == {} + + +@pytest.mark.vscode +def test_repo_with_plots( + tmp_dir, scm, dvc, capsys, run_copy_metrics, repo_with_plots +): + repo_state = repo_with_plots() + + image_v1, linear_v1, confusion_v1, confusion_props = next(repo_state) + + html_path, json_result, split_json_result = call(capsys) + html_result = extract_vega_specs( + html_path, ["linear.json", "confusion.json"] + ) + assert html_result["linear.json"]["data"]["values"] == _update_datapoints( + linear_v1, + { + "rev": "workspace", + }, + ) + assert html_result["confusion.json"]["data"][ + "values" + ] == _update_datapoints( + confusion_v1, + { + "rev": "workspace", + }, + ) + + verify_image( + tmp_dir, "workspace", "image.png", image_v1, html_path, json_result + ) + + for plot in ["linear.json", "confusion.json"]: + verify_vega( + "workspace", + html_result[plot], + json_result[plot], + split_json_result[plot], + ) + + verify_vega_props("confusion.json", json_result, **confusion_props) + + image_v2, linear_v2, confusion_v2, confusion_props = next(repo_state) + + html_path, json_result, split_json_result = call(capsys, subcommand="diff") + html_result = extract_vega_specs( + html_path, ["linear.json", "confusion.json"] + ) + + verify_image( + tmp_dir, "workspace", "image.png", image_v2, html_path, json_result + ) + verify_image( + tmp_dir, "HEAD", "image.png", image_v1, html_path, json_result + ) + + for plot in ["linear.json", "confusion.json"]: + verify_vega( + ["HEAD", "workspace"], + html_result[plot], + json_result[plot], + split_json_result[plot], + ) + verify_vega_props("confusion.json", json_result, **confusion_props) + path = tmp_dir / "subdir" + path.mkdir() + with path.chdir(): + html_path, json_result, split_json_result = call( + capsys, subcommand="diff" + ) + html_result = extract_vega_specs( + html_path, + ["../linear.json", "../confusion.json"], + ) + assert html_result["../linear.json"]["data"][ + "values" + ] == _update_datapoints( + linear_v2, + { + "rev": "workspace", + }, + ) + _update_datapoints( + linear_v1, + { + "rev": "HEAD", + }, + ) + assert html_result["../confusion.json"]["data"][ + "values" + ] == _update_datapoints( + confusion_v2, + { + "rev": "workspace", + }, + ) + _update_datapoints( + confusion_v1, + { + "rev": "HEAD", + }, + ) + + for plot in [ + "../linear.json", + "../confusion.json", + ]: + verify_vega( + ["HEAD", "workspace"], + html_result[plot], + json_result[plot], + split_json_result[plot], + ) + verify_image( + path, + "workspace", + "../image.png", + image_v2, + html_path, + json_result, + ) + verify_image( + path, + "HEAD", + "../image.png", + image_v1, + html_path, + json_result, + ) + + +@pytest.mark.vscode +def test_repo_with_removed_plots(tmp_dir, capsys, repo_with_plots): + from dvc.utils.fs import remove + + next(repo_with_plots()) + + # even if there is no data, call should be successful + remove(tmp_dir / ".dvc" / "cache") + remove("linear.json") + remove("confusion.json") + remove("image.png") + + for s in {"show", "diff"}: + _, json_result, split_json_result = call(capsys, subcommand=s) + for p in { + "linear.json", + "confusion.json", + "image.png", + }: + assert json_result[p] == [] + assert split_json_result[p] == [] + + +def test_config_output_dir(tmp_dir, dvc, capsys): + subdir = tmp_dir / "subdir" + ret = main(["config", "plots.out_dir", os.fspath(subdir)]) + assert ret == 0 + + metric = [{"first_val": 100, "val": 2}, {"first_val": 200, "val": 3}] + (tmp_dir / "metric.json").dump_json(metric, sort_keys=True) + + assert main(["plots", "show", "metric.json"]) == 0 + + out, _ = capsys.readouterr() + assert subdir.as_uri() in out + assert subdir.is_dir() + assert (subdir / "index.html").is_file() + + cli_arg_subdir = tmp_dir / "cli_option" + assert ( + main(["plots", "show", "-o", os.fspath(cli_arg_subdir), "metric.json"]) + == 0 + ) + + out, _ = capsys.readouterr() + assert cli_arg_subdir.as_uri() in out + assert cli_arg_subdir.is_dir() + assert (cli_arg_subdir / "index.html").is_file() + + +@pytest.mark.vscode +def test_repo_with_config_plots(tmp_dir, capsys, repo_with_config_plots): + repo_state = repo_with_config_plots() + plots = next(repo_state) + + html_path, json_result, split_json_result = call(capsys) + + assert os.path.exists(html_path) + assert json_result == {} + assert split_json_result == {} + html_result = extract_vega_specs( + html_path, + [ + "dvc.yaml::linear_train_vs_test", + "dvc.yaml::confusion_train_vs_test", + ], + ) + assert html_result["dvc.yaml::linear_train_vs_test"]["data"][ + "values" + ] == _update_datapoints( + plots["data"]["linear_train.json"], {"rev": "linear_train.json"} + ) + _update_datapoints( + plots["data"]["linear_test.json"], {"rev": "linear_test.json"} + ) + + # TODO check json results once vscode is able to handle flexible plots diff --git a/tests/integration/plots/test_repo_plots_api.py b/tests/integration/plots/test_repo_plots_api.py index 24fcdd9fcc..f7b0064a9d 100644 --- a/tests/integration/plots/test_repo_plots_api.py +++ b/tests/integration/plots/test_repo_plots_api.py @@ -1,6 +1,7 @@ -import dpath.util import pytest +from tests.utils.plots import get_plot + @pytest.mark.studio def test_api(tmp_dir, dvc, repo_with_plots): @@ -10,37 +11,35 @@ def test_api(tmp_dir, dvc, repo_with_plots): workspace_data = next(dvc.plots.collect()) assert ( - dpath.util.get( - workspace_data, ["workspace", "data", "image.png", "props"] - ) + get_plot(workspace_data, "workspace", file="image.png", endkey="props") == {} ) - image_source = dpath.util.get( - workspace_data, ["workspace", "data", "image.png", "data_source"] + image_source = get_plot( + workspace_data, "workspace", file="image.png", endkey="data_source" ) assert callable(image_source) assert image_source() == {"data": image_v1} - assert ( - dpath.util.get( - workspace_data, ["workspace", "data", "linear.json", "props"] - ) - == {} - ) - linear_source = dpath.util.get( - workspace_data, ["workspace", "data", "linear.json", "data_source"] + assert get_plot( + workspace_data, "workspace", file="linear.json", endkey="props" + ) == {"title": "linear", "x": "x"} + linear_source = get_plot( + workspace_data, "workspace", file="linear.json", endkey="data_source" ) assert callable(linear_source) assert linear_source() == {"data": linear_v1} assert ( - dpath.util.get( - workspace_data, ["workspace", "data", "confusion.json", "props"] + get_plot( + workspace_data, "workspace", file="confusion.json", endkey="props" ) == confusion_params ) - confusion_source = dpath.util.get( - workspace_data, ["workspace", "data", "confusion.json", "data_source"] + confusion_source = get_plot( + workspace_data, + "workspace", + file="confusion.json", + endkey="data_source", ) assert callable(confusion_source) assert confusion_source() == {"data": confusion_v1} @@ -51,68 +50,84 @@ def test_api(tmp_dir, dvc, repo_with_plots): workspace_data = next(data_generator) assert ( - dpath.util.get( - workspace_data, ["workspace", "data", "image.png", "props"] - ) + get_plot(workspace_data, "workspace", file="image.png", endkey="props") == {} ) - image_source = dpath.util.get( - workspace_data, ["workspace", "data", "image.png", "data_source"] + image_source = get_plot( + workspace_data, "workspace", file="image.png", endkey="data_source" ) assert callable(image_source) assert image_source() == {"data": image_v2} - assert ( - dpath.util.get( - workspace_data, ["workspace", "data", "linear.json", "props"] - ) - == {} - ) - linear_source = dpath.util.get( - workspace_data, ["workspace", "data", "linear.json", "data_source"] + assert get_plot( + workspace_data, "workspace", file="linear.json", endkey="props" + ) == {"title": "linear", "x": "x"} + linear_source = get_plot( + workspace_data, "workspace", file="linear.json", endkey="data_source" ) assert callable(linear_source) assert linear_source() == {"data": linear_v2} assert ( - dpath.util.get( - workspace_data, ["workspace", "data", "confusion.json", "props"] + get_plot( + workspace_data, "workspace", file="confusion.json", endkey="props" ) == confusion_params ) - confusion_source = dpath.util.get( - workspace_data, ["workspace", "data", "confusion.json", "data_source"] + confusion_source = get_plot( + workspace_data, + "workspace", + file="confusion.json", + endkey="data_source", ) assert callable(confusion_source) assert confusion_source() == {"data": confusion_v2} head_data = next(data_generator) - assert ( - dpath.util.get(head_data, ["HEAD", "data", "image.png", "props"]) == {} - ) - image_source = dpath.util.get( - head_data, ["HEAD", "data", "image.png", "data_source"] + assert get_plot(head_data, "HEAD", file="image.png", endkey="props") == {} + image_source = get_plot( + head_data, "HEAD", file="image.png", endkey="data_source" ) assert callable(image_source) assert image_source() == {"data": image_v1} - assert ( - dpath.util.get(head_data, ["HEAD", "data", "linear.json", "props"]) - == {} - ) - linear_source = dpath.util.get( - head_data, ["HEAD", "data", "linear.json", "data_source"] + assert get_plot(head_data, "HEAD", file="linear.json", endkey="props") == { + "title": "linear", + "x": "x", + } + linear_source = get_plot( + head_data, "HEAD", file="linear.json", endkey="data_source" ) assert callable(linear_source) assert linear_source() == {"data": linear_v1} assert ( - dpath.util.get(head_data, ["HEAD", "data", "confusion.json", "props"]) + get_plot(head_data, "HEAD", file="confusion.json", endkey="props") == confusion_params ) - confusion_source = dpath.util.get( - head_data, ["HEAD", "data", "confusion.json", "data_source"] + confusion_source = get_plot( + head_data, "HEAD", file="confusion.json", endkey="data_source" ) assert callable(confusion_source) assert confusion_source() == {"data": confusion_v1} + + +@pytest.mark.studio +def test_api_with_config_plots(tmp_dir, dvc, capsys, repo_with_config_plots): + repo_state = repo_with_config_plots() + plots_state = next(repo_state) + + plots_data = next(dvc.plots.collect()) + + assert ( + get_plot(plots_data, "workspace", typ="definitions", file="dvc.yaml") + == plots_state["configs"]["dvc.yaml"] + ) + + for file in plots_state["data"]: + data_source = get_plot( + plots_data, "workspace", file=file, endkey="data_source" + ) + assert callable(data_source) + assert data_source() == {"data": plots_state["data"][file]} diff --git a/tests/unit/command/test_plots.py b/tests/unit/command/test_plots.py index a2430c786c..d9ba1a4f31 100644 --- a/tests/unit/command/test_plots.py +++ b/tests/unit/command/test_plots.py @@ -14,10 +14,16 @@ def plots_data(): yield { "revision": { - "data": { - "plot.csv": {"data": [{"val": 1}, {"val": 2}], "props": {}}, - "other.jpg": {"data": b"content"}, - } + "sources": { + "data": { + "plot.csv": { + "data": [{"val": 1}, {"val": 2}], + "props": {}, + }, + "other.jpg": {"data": b"content"}, + } + }, + "definitions": {"data": {"dvc.yaml": {"data": {"plot.csv": {}}}}}, } } @@ -48,6 +54,8 @@ def test_plots_diff(dvc, mocker, plots_data): "HEAD", "tag1", "tag2", + "--from-config", + "path_to_config", ] ) assert cli_args.func == CmdPlotsDiff @@ -73,6 +81,7 @@ def test_plots_diff(dvc, mocker, plots_data): "y_label": "y_title", }, experiment=True, + config_files={"path_to_config"}, ) render_mock.assert_not_called() @@ -98,6 +107,7 @@ def test_plots_show_vega(dvc, mocker, plots_data): m = mocker.patch( "dvc.repo.plots.Plots.show", return_value=plots_data, + config_files=None, ) render_mock = mocker.patch( "dvc_render.render_html", return_value="html_path" @@ -108,6 +118,7 @@ def test_plots_show_vega(dvc, mocker, plots_data): m.assert_called_once_with( targets=["datafile"], props={"template": "template", "header": False}, + config_files=None, ) render_mock.assert_not_called() @@ -190,9 +201,7 @@ def test_plots_diff_open_failed(tmp_dir, dvc, mocker, capsys, plots_data): ["plots", "diff", "--targets", "plots.csv", "--open"] ) cmd = cli_args.func(cli_args) - mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": plots_data} - ) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) assert cmd.run() == 1 expected_url = tmp_dir / "dvc_plots" / "index.html" @@ -229,9 +238,7 @@ def test_plots_path_is_quoted_and_resolved_properly( ["plots", "diff", "--targets", "datafile", "--out", output] ) cmd = cli_args.func(cli_args) - mocker.patch( - "dvc.repo.plots.diff.diff", return_value={"datafile": plots_data} - ) + mocker.patch("dvc.repo.plots.diff.diff", return_value=plots_data) assert cmd.run() == 0 expected_url = posixpath.join(tmp_dir.as_uri(), expected_url_path) @@ -258,13 +265,13 @@ def test_should_pass_template_dir(tmp_dir, dvc, mocker, capsys): renderers = mocker.MagicMock() match_renderers = mocker.patch( - "dvc.render.match.match_renderers", return_value=renderers + "dvc.render.match.match_defs_renderers", return_value=renderers ) assert cmd.run() == 0 match_renderers.assert_called_once_with( - plots_data=data, + data=data, out="dvc_plots", templates_dir=str(tmp_dir / ".dvc/plots"), ) @@ -283,7 +290,9 @@ def test_should_call_render(tmp_dir, mocker, capsys, plots_data, output): output = output or "dvc_plots" index_path = tmp_dir / output / "index.html" renderers = mocker.MagicMock() - mocker.patch("dvc.render.match.match_renderers", return_value=renderers) + mocker.patch( + "dvc.render.match.match_defs_renderers", return_value=renderers + ) render_mock = mocker.patch( "dvc_render.render_html", return_value=index_path ) @@ -321,9 +330,15 @@ def test_plots_diff_json(dvc, mocker, capsys): mocker.patch("dvc.repo.plots.diff.diff", return_value=data) renderers = mocker.MagicMock() - mocker.patch("dvc.render.match.match_renderers", return_value=renderers) + mocker.patch( + "dvc.render.match.match_defs_renderers", return_value=renderers + ) render_mock = mocker.patch("dvc_render.render_html") + show_json_mock = mocker.patch( + "dvc.commands.plots._filter_unhandled_renderers", + return_value=renderers, + ) show_json_mock = mocker.patch("dvc.commands.plots._show_json") assert cmd.run() == 0 diff --git a/tests/unit/render/test_convert.py b/tests/unit/render/test_convert.py index 8f91d6d1fb..1f4294774d 100644 --- a/tests/unit/render/test_convert.py +++ b/tests/unit/render/test_convert.py @@ -1,159 +1,5 @@ -from dvc.render import ( - INDEX_FIELD, - REVISION_FIELD, - REVISIONS_KEY, - SRC_FIELD, - TYPE_KEY, -) -from dvc.render.convert import to_datapoints, to_json - - -def test_to_datapoints_single_revision(mocker): - renderer = mocker.MagicMock() - renderer.TYPE = "vega" - - input_data = { - "revision": { - "data": { - "filename": { - "data": { - "metric": [ - {"v": 1, "v2": 0.1, "v3": 0.01, "v4": 0.001}, - {"v": 2, "v2": 0.2, "v3": 0.02, "v4": 0.002}, - ] - } - } - } - } - } - props = {"fields": {"v"}, "x": "v2", "y": "v3"} - - datapoints, resolved_properties = to_datapoints( - renderer, input_data, props - ) - - assert datapoints == [ - { - "v": 1, - "v2": 0.1, - "v3": 0.01, - "rev": "revision", - "filename": "filename", - }, - { - "v": 2, - "v2": 0.2, - "v3": 0.02, - "rev": "revision", - "filename": "filename", - }, - ] - assert resolved_properties == { - "fields": {"v", "v2", "v3"}, - "x": "v2", - "y": "v3", - } - - -def test_to_datapoints_revision_with_error(mocker): - renderer = mocker.MagicMock() - renderer.TYPE = "vega" - - data = { - "v2": { - "data": {"file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}} - }, - "workspace": { - "data": {"file.json": {"error": FileNotFoundError(), "props": {}}} - }, - } - datapoints, final_props = to_datapoints(renderer, data, {}) - - assert datapoints == [ - { - "y": 2, - INDEX_FIELD: 0, - REVISION_FIELD: "v2", - "filename": "file.json", - }, - { - "y": 3, - INDEX_FIELD: 1, - REVISION_FIELD: "v2", - "filename": "file.json", - }, - ] - assert final_props == {"x": INDEX_FIELD, "y": "y"} - - -def test_to_datapoints_multiple_revisions(mocker): - renderer = mocker.MagicMock() - renderer.TYPE = "vega" - - metric_1 = [{"y": 2}, {"y": 3}] - metric_2 = [{"y": 3}, {"y": 5}] - metric_3 = [{"y": 5}, {"y": 6}] - - data = { - "HEAD": { - "data": { - "file.json": {"data": metric_3, "props": {"fields": {"y"}}} - } - }, - "v2": { - "data": { - "file.json": {"data": metric_2, "props": {"fields": {"y"}}} - } - }, - "v1": { - "data": { - "file.json": {"data": metric_1, "props": {"fields": {"y"}}} - } - }, - } - props = {"fields": {"y"}} - - datapoints, final_props = to_datapoints(renderer, data, props) - - assert datapoints == [ - { - "y": 5, - INDEX_FIELD: 0, - REVISION_FIELD: "HEAD", - "filename": "file.json", - }, - { - "y": 6, - INDEX_FIELD: 1, - REVISION_FIELD: "HEAD", - "filename": "file.json", - }, - { - "y": 3, - INDEX_FIELD: 0, - REVISION_FIELD: "v2", - "filename": "file.json", - }, - { - "y": 5, - INDEX_FIELD: 1, - REVISION_FIELD: "v2", - "filename": "file.json", - }, - { - "y": 2, - INDEX_FIELD: 0, - REVISION_FIELD: "v1", - "filename": "file.json", - }, - { - "y": 3, - INDEX_FIELD: 1, - REVISION_FIELD: "v1", - "filename": "file.json", - }, - ] - assert final_props == {"x": INDEX_FIELD, "y": "y", "fields": {"y", "step"}} +from dvc.render import REVISION_FIELD, REVISIONS_KEY, SRC_FIELD, TYPE_KEY +from dvc.render.convert import to_json def test_to_json_vega(mocker): diff --git a/tests/unit/render/test_image_converter.py b/tests/unit/render/test_image_converter.py index 818c987c2f..8912fad853 100644 --- a/tests/unit/render/test_image_converter.py +++ b/tests/unit/render/test_image_converter.py @@ -1,5 +1,5 @@ from dvc.render import REVISION_FIELD, SRC_FIELD -from dvc.render.image_converter import ImageConverter +from dvc.render.converter.image import ImageConverter def test_image_converter_no_out(): diff --git a/tests/unit/render/test_match.py b/tests/unit/render/test_match.py index 53ca64540a..3c00ddf9f0 100644 --- a/tests/unit/render/test_match.py +++ b/tests/unit/render/test_match.py @@ -1,183 +1,120 @@ +from dvc.render import VERSION_FIELD from dvc.render.match import ( - group_by_filename, - match_renderers, - squash_plots_properties, + PlotsData, + _squash_plots_properties, + match_defs_renderers, ) -def test_group_by_filename(): +def test_group_definitions(): error = FileNotFoundError() data = { - "v2": { - "data": { - "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, - "other_file.jpg": {"data": "content"}, - } - }, "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - "other_file.jpg": {"data": "content2"}, - } - }, - } - - results = group_by_filename(data) - assert results["file.json"] == { - "v2": { - "data": { - "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, - } - }, - "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - } - }, - } - assert results["other_file.jpg"] == { - "v2": { - "data": { - "other_file.jpg": {"data": "content"}, - } - }, - "workspace": { - "data": { - "other_file.jpg": {"data": "content2"}, + "definitions": { + "data": { + "config_file_1": { + "data": {"plot_id_1": {}, "plot_id_2": {}} + }, + "config_file_2": {"data": {"plot_id_3": {}}}, + } } }, - } - - -def test_squash_plots_properties(): - error = FileNotFoundError() - group = { "v2": { - "data": { - "file.json": { - "data": [{"y": 2}, {"y": 3}], - "props": {"foo": 1}, - }, - } - }, - "v1": { - "data": { - "file.json": { - "data": [{"y": 4}, {"y": 5}], - "props": {"bar": 1}, + "definitions": { + "data": { + "config_file_1": {"error": error}, + "config_file_2": {"data": {"plot_id_3": {}}}, } } }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - } - }, } - plot_properties = squash_plots_properties(group) - - assert plot_properties == {"foo": 1, "bar": 1} - + grouped = PlotsData(data).group_definitions() -def test_match_renderers_no_out(mocker): - from dvc import render - - vega_convert = mocker.spy(render.vega_converter.VegaConverter, "convert") - image_convert = mocker.spy( - render.image_converter.ImageConverter, "convert" - ) - image_encode = mocker.spy( - render.image_converter.ImageConverter, "_encode_image" - ) - image_write = mocker.spy( - render.image_converter.ImageConverter, "_write_image" - ) - - error = FileNotFoundError() - data = { - "v2": { - "data": { - "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, - "other_file.jpg": {"data": b"content"}, - } - }, - "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - "other_file.jpg": {"data": b"content2"}, - } - }, + assert grouped == { + "config_file_1::plot_id_1": [("v1", "plot_id_1", {})], + "config_file_1::plot_id_2": [("v1", "plot_id_2", {})], + "config_file_2::plot_id_3": [ + ("v1", "plot_id_3", {}), + ("v2", "plot_id_3", {}), + ], } - renderers = match_renderers(data) - - assert {r.TYPE for r in renderers} == {"vega", "image"} - vega_convert.assert_called() - image_convert.assert_called() - image_encode.assert_called() - image_write.assert_not_called() - -def test_match_renderers_with_out(tmp_dir, mocker): - from dvc import render - - image_encode = mocker.spy( - render.image_converter.ImageConverter, "_encode_image" - ) - image_write = mocker.spy( - render.image_converter.ImageConverter, "_write_image" - ) - - error = FileNotFoundError() +def test_match_renderers(mocker): data = { - "v2": { - "data": { - "file.json": {"data": [{"y": 2}, {"y": 3}], "props": {}}, - "other_file.jpg": {"data": b"content"}, - } - }, "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - "workspace": { - "data": { - "file.json": {"error": error, "props": {}}, - "other_file.jpg": {"data": b"content2"}, - } + "definitions": { + "data": { + "config_file_1": { + "data": { + "plot_id_1": { + "x": "x", + "y": {"file.json": "y"}, + } + } + } + }, + }, + "sources": { + "data": { + "file.json": {"data": [{"x": 1, "y": 1}, {"x": 2, "y": 2}]} + } + }, + }, + "errored_revision": { + "definitions": { + "data": {"config_file_1": {"error": FileNotFoundError()}}, + }, + "sources": {}, + }, + "revision_with_no_data": { + "definitions": { + "data": { + "config_file_1": { + "data": { + "plot_id_1": { + "x": "x", + "y": {"file.json": "y"}, + } + } + } + }, + }, + "sources": {"data": {"file.json": {"error": FileNotFoundError()}}}, }, } - match_renderers(data, out=tmp_dir / "foo") - - image_encode.assert_not_called() - image_write.assert_called() - - assert (tmp_dir / "foo" / "v2_other_file.jpg").read_bytes() == b"content" - assert ( - tmp_dir / "foo" / "workspace_other_file.jpg" - ).read_bytes() == b"content2" - + renderers = match_defs_renderers(data) + assert len(renderers) == 1 + assert renderers[0].datapoints == [ + { + VERSION_FIELD: {"revision": "v1", "filename": "file.json"}, + "rev": "v1", + "x": 1, + "y": 1, + }, + { + VERSION_FIELD: {"revision": "v1", "filename": "file.json"}, + "rev": "v1", + "x": 2, + "y": 2, + }, + ] + assert renderers[0].properties == { + "title": "config_file_1::plot_id_1", + "x": "x", + "y": "y", + } -def test_match_renderers_template_dir(mocker): - from dvc_render import vega - vega_render = mocker.spy(vega.VegaRenderer, "__init__") - data = { - "v1": { - "data": {"file.json": {"data": [{"y": 4}, {"y": 5}], "props": {}}} - }, - } +def test_squash_plots_properties(): + group = [ + ("v3", "config_file", "plot_id", {"foo": 1}), + ("v2", "config_file", "plot_id", {"foo": 2, "bar": 2}), + ("v1", "config_file", "plot_id", {"baz": 3}), + ] - match_renderers(data, templates_dir="foo") + plot_properties = _squash_plots_properties(group) - assert vega_render.call_args[1]["template_dir"] == "foo" + assert plot_properties == {"foo": 1, "bar": 2, "baz": 3} diff --git a/tests/unit/render/test_vega_converter.py b/tests/unit/render/test_vega_converter.py index d1b910195b..a6073d1160 100644 --- a/tests/unit/render/test_vega_converter.py +++ b/tests/unit/render/test_vega_converter.py @@ -2,7 +2,8 @@ import pytest -from dvc.render.vega_converter import ( +from dvc.render import VERSION_FIELD +from dvc.render.converter.vega import ( FieldsNotFoundError, PlotDataStructureError, VegaConverter, @@ -61,8 +62,18 @@ def test_finding_lists(dictionary, expected_result): {"metric": [{"v": 1}, {"v": 2}]}, {}, [ - {"v": 1, "step": 0, "filename": "f", "rev": "r"}, - {"v": 2, "step": 1, "filename": "f", "rev": "r"}, + { + "v": 1, + "step": 0, + VERSION_FIELD: {"revision": "r", "filename": "f"}, + "rev": "r", + }, + { + "v": 2, + "step": 1, + VERSION_FIELD: {"revision": "r", "filename": "f"}, + "rev": "r", + }, ], {"x": "step", "y": "v"}, ), @@ -71,8 +82,18 @@ def test_finding_lists(dictionary, expected_result): {"metric": [{"v": 1, "v2": 0.1}, {"v": 2, "v2": 0.2}]}, {"fields": {"v"}}, [ - {"v": 1, "step": 0, "filename": "f", "rev": "r"}, - {"v": 2, "step": 1, "filename": "f", "rev": "r"}, + { + "v": 1, + "step": 0, + VERSION_FIELD: {"revision": "r", "filename": "f"}, + "rev": "r", + }, + { + "v": 2, + "step": 1, + VERSION_FIELD: {"revision": "r", "filename": "f"}, + "rev": "r", + }, ], { "x": "step", @@ -85,8 +106,26 @@ def test_finding_lists(dictionary, expected_result): {"metric": [{"v": 1, "v2": 0.1}, {"v": 2, "v2": 0.2}]}, {"x": "v", "y": "v2"}, [ - {"v": 1, "v2": 0.1, "filename": "f", "rev": "r"}, - {"v": 2, "v2": 0.2, "filename": "f", "rev": "r"}, + { + "v": 1, + "v2": 0.1, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v2", + }, + "rev": "r", + }, + { + "v": 2, + "v2": 0.2, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v2", + }, + "rev": "r", + }, ], {"x": "v", "y": "v2"}, ), @@ -100,8 +139,28 @@ def test_finding_lists(dictionary, expected_result): }, {"x": "v3", "y": "v4", "fields": {"v"}}, [ - {"v": 1, "v3": 0.01, "v4": 0.001, "filename": "f", "rev": "r"}, - {"v": 2, "v3": 0.02, "v4": 0.002, "filename": "f", "rev": "r"}, + { + "v": 1, + "v3": 0.01, + "v4": 0.001, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v4", + }, + "rev": "r", + }, + { + "v": 2, + "v3": 0.02, + "v4": 0.002, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v4", + }, + "rev": "r", + }, ], {"x": "v3", "y": "v4", "fields": {"v", "v3", "v4"}}, ), @@ -117,11 +176,85 @@ def test_finding_lists(dictionary, expected_result): }, {"x": "v", "y": "v2"}, [ - {"v": 1, "v2": 0.1, "filename": "f", "rev": "r"}, - {"v": 2, "v2": 0.2, "filename": "f", "rev": "r"}, + { + "v": 1, + "v2": 0.1, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v2", + }, + "rev": "r", + }, + { + "v": 2, + "v2": 0.2, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v2", + }, + "rev": "r", + }, ], {"x": "v", "y": "v2"}, ), + ( + # provide list of fields in y def + {"metric": [{"v": 1, "v2": 0.1}, {"v": 2, "v2": 0.2}]}, + {"y": {"f": ["v", "v2"]}}, + [ + { + "v": 1, + "v2": 0.1, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v", + }, + "rev": "r", + "dvc_inferred_y_value": 1, + "step": 0, + }, + { + "v": 1, + "v2": 0.1, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v2", + }, + "rev": "r", + "dvc_inferred_y_value": 0.1, + "step": 0, + }, + { + "v": 2, + "v2": 0.2, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v", + }, + "rev": "r", + "dvc_inferred_y_value": 2, + "step": 1, + }, + { + "v": 2, + "v2": 0.2, + VERSION_FIELD: { + "revision": "r", + "filename": "f", + "field": "v2", + }, + "rev": "r", + "dvc_inferred_y_value": 0.2, + "step": 1, + }, + ], + {"x": "step", "y": "dvc_inferred_y_value", "y_label": "y"}, + ), ], ) def test_convert( @@ -150,7 +283,15 @@ def test_convert_skip_step(): ) assert datapoints == [ - {"v": 1, "filename": "f", "rev": "r"}, - {"v": 2, "filename": "f", "rev": "r"}, + { + "v": 1, + "rev": "r", + VERSION_FIELD: {"revision": "r", "filename": "f"}, + }, + { + "v": 2, + "rev": "r", + VERSION_FIELD: {"revision": "r", "filename": "f"}, + }, ] assert resolved_properties == {"x": "step", "y": "v"} diff --git a/tests/unit/test_plots.py b/tests/unit/test_plots.py deleted file mode 100644 index 9b9136c729..0000000000 --- a/tests/unit/test_plots.py +++ /dev/null @@ -1,34 +0,0 @@ -import json -import os - - -def test_plots_order(tmp_dir, dvc): - tmp_dir.gen( - { - "p.json": json.dumps([{"p1": 1}, {"p1": 2}]), - "p1.json": json.dumps([{"p2": 1}, {"p2": 2}]), - "sub": { - "p3.json": json.dumps([{"p3": 1}, {"p3": 2}]), - "p4.json": json.dumps([{"p4": 1}, {"p4": 2}]), - }, - } - ) - - dvc.stage.add( - plots=["p.json", str(tmp_dir / "sub" / "p4.json")], - cmd="cmd1", - name="stage1", - ) - with (tmp_dir / "sub").chdir(): - dvc.stage.add( - plots=[str(tmp_dir / "p1.json"), "p3.json"], - cmd="cmd2", - name="stage2", - ) - - assert list(dvc.plots.show()["workspace"]["data"]) == [ - "p.json", - os.path.join("sub", "p4.json"), - "p1.json", - os.path.join("sub", "p3.json"), - ] diff --git a/tests/utils/plots.py b/tests/utils/plots.py new file mode 100644 index 0000000000..aa8aa78427 --- /dev/null +++ b/tests/utils/plots.py @@ -0,0 +1,11 @@ +import dpath.util + +dpath.options.ALLOW_EMPTY_STRING_KEYS = True + + +def get_plot(plots_data, revision, typ="sources", file=None, endkey="data"): + if file is not None: + return dpath.util.get( + plots_data, [revision, typ, "data", file, endkey] + ) + return dpath.util.get(plots_data, [revision, typ, endkey])