From 876cf20321656d4a0386796f51d79a6e66ed0402 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 17 Oct 2022 11:07:03 +0200 Subject: [PATCH] live: Revisit output names and structure. (#322) * live: Revisit output names and structure. Applied https://github.com/iterative/dvclive/issues/246#issuecomment-1258582603 Closes #246 * Rename `log_plot` to `log_sklearn_plot`. * Rename `plot` -> `sklearn` * Rename `sklearn` -> `sklearn_plot` --- src/dvclive/data/__init__.py | 12 +++- src/dvclive/data/{scalar.py => metric.py} | 4 +- src/dvclive/data/{plot.py => sklearn_plot.py} | 14 ++--- src/dvclive/live.py | 28 +++++----- src/dvclive/report.py | 36 ++++++------ src/dvclive/studio.py | 6 +- src/dvclive/utils.py | 12 ++-- tests/test_catalyst.py | 6 +- tests/test_data/test_image.py | 56 +++++++++++-------- tests/test_data/test_plot.py | 34 +++++------ tests/test_data/test_scalar.py | 10 ++-- tests/test_fastai.py | 15 +++-- tests/test_huggingface.py | 11 ++-- tests/test_keras.py | 8 +-- tests/test_lgbm.py | 4 +- tests/test_lightning.py | 10 ++-- tests/test_main.py | 36 ++++++------ tests/test_report.py | 22 ++++---- tests/test_studio.py | 12 ++-- tests/test_xgboost.py | 4 +- 20 files changed, 184 insertions(+), 156 deletions(-) rename src/dvclive/data/{scalar.py => metric.py} (97%) rename src/dvclive/data/{plot.py => sklearn_plot.py} (95%) diff --git a/src/dvclive/data/__init__.py b/src/dvclive/data/__init__.py index 64825119..0829d446 100644 --- a/src/dvclive/data/__init__.py +++ b/src/dvclive/data/__init__.py @@ -1,6 +1,12 @@ from .image import Image -from .plot import Calibration, ConfusionMatrix, Det, PrecisionRecall, Roc -from .scalar import Scalar +from .metric import Metric +from .sklearn_plot import ( + Calibration, + ConfusionMatrix, + Det, + PrecisionRecall, + Roc, +) from .utils import NumpyEncoder # noqa: F401 PLOTS = { @@ -10,4 +16,4 @@ "precision_recall": PrecisionRecall, "roc": Roc, } -DATA_TYPES = (*PLOTS.values(), Scalar, Image) +DATA_TYPES = (*PLOTS.values(), Metric, Image) diff --git a/src/dvclive/data/scalar.py b/src/dvclive/data/metric.py similarity index 97% rename from src/dvclive/data/scalar.py rename to src/dvclive/data/metric.py index 4c462674..c1d965f6 100644 --- a/src/dvclive/data/scalar.py +++ b/src/dvclive/data/metric.py @@ -10,9 +10,9 @@ from .utils import NUMPY_SCALARS -class Scalar(Data): +class Metric(Data): suffixes = [".csv", ".tsv"] - subfolder = "scalars" + subfolder = "metrics" @staticmethod def could_log(val: object) -> bool: diff --git a/src/dvclive/data/plot.py b/src/dvclive/data/sklearn_plot.py similarity index 95% rename from src/dvclive/data/plot.py rename to src/dvclive/data/sklearn_plot.py index 7b3745f3..e227e8cd 100644 --- a/src/dvclive/data/plot.py +++ b/src/dvclive/data/sklearn_plot.py @@ -4,9 +4,9 @@ from .base import Data -class Plot(Data): +class SKLearnPlot(Data): suffixes = [".json"] - subfolder = "plots" + subfolder = "sklearn" @property def output_path(self) -> Path: @@ -51,7 +51,7 @@ def get_properties(): raise NotImplementedError -class Roc(Plot): +class Roc(SKLearnPlot): @staticmethod def get_properties(): return { @@ -79,7 +79,7 @@ def no_step_dump(self) -> None: self.write_json(roc, self.output_path) -class PrecisionRecall(Plot): +class PrecisionRecall(SKLearnPlot): @staticmethod def get_properties(): return { @@ -108,7 +108,7 @@ def no_step_dump(self) -> None: self.write_json(prc, self.output_path) -class Det(Plot): +class Det(SKLearnPlot): @staticmethod def get_properties(): return { @@ -137,7 +137,7 @@ def no_step_dump(self) -> None: self.write_json(det, self.output_path) -class ConfusionMatrix(Plot): +class ConfusionMatrix(SKLearnPlot): @staticmethod def get_properties(): return { @@ -159,7 +159,7 @@ def no_step_dump(self) -> None: self.write_json(cm, self.output_path) -class Calibration(Plot): +class Calibration(SKLearnPlot): @staticmethod def get_properties(): return { diff --git a/src/dvclive/live.py b/src/dvclive/live.py index a8858e44..278a55e7 100644 --- a/src/dvclive/live.py +++ b/src/dvclive/live.py @@ -10,7 +10,7 @@ from ruamel.yaml.representer import RepresenterError from . import env -from .data import DATA_TYPES, PLOTS, Image, NumpyEncoder, Scalar +from .data import DATA_TYPES, PLOTS, Image, Metric, NumpyEncoder from .dvc import make_checkpoint from .error import ( ConfigMismatchError, @@ -109,10 +109,10 @@ def __init__( def _cleanup(self): for data_type in DATA_TYPES: shutil.rmtree( - Path(self.dir) / data_type.subfolder, ignore_errors=True + Path(self.plots_path) / data_type.subfolder, ignore_errors=True ) - for f in (self.summary_path, self.report_path, self.params_path): + for f in (self.metrics_path, self.report_path, self.params_path): if os.path.exists(f): os.remove(f) @@ -153,12 +153,12 @@ def params_path(self): return os.path.join(self.dir, "params.yaml") @property - def exists(self): - return os.path.isdir(self.dir) + def metrics_path(self): + return os.path.join(self.dir, "metrics.json") @property - def summary_path(self): - return str(self.dir) + ".json" + def plots_path(self): + return os.path.join(self.dir, "plots") def get_step(self) -> int: return self._step or 0 @@ -194,13 +194,13 @@ def next_step(self): self.set_step(self.get_step() + 1) def log(self, name: str, val: Union[int, float]): - if not Scalar.could_log(val): + if not Metric.could_log(val): raise InvalidDataTypeError(name, type(val)) if name in self._scalars: data = self._scalars[name] else: - data = Scalar(name, self.dir) + data = Metric(name, self.plots_path) self._scalars[name] = data data.dump(val, self._step) @@ -215,19 +215,19 @@ def log_image(self, name: str, val): if name in self._images: data = self._images[name] else: - data = Image(name, self.dir) + data = Image(name, self.plots_path) self._images[name] = data data.dump(val, self._step) logger.debug(f"Logged {name}: {val}") - def log_plot(self, name, labels, predictions, **kwargs): + def log_sklearn_plot(self, name, labels, predictions, **kwargs): val = (labels, predictions) if name in self._plots: data = self._plots[name] elif name in PLOTS and PLOTS[name].could_log(val): - data = PLOTS[name](name, self.dir) + data = PLOTS[name](name, self.plots_path) self._plots[name] = data else: raise InvalidPlotTypeError(name) @@ -268,7 +268,7 @@ def make_summary(self): for data in self._scalars.values(): summary_data = nested_update(summary_data, data.summary) - with open(self.summary_path, "w", encoding="utf-8") as f: + with open(self.metrics_path, "w", encoding="utf-8") as f: json.dump(summary_data, f, indent=4, cls=NumpyEncoder) def make_report(self): @@ -287,7 +287,7 @@ def make_checkpoint(self): make_checkpoint() def read_step(self): - if Path(self.summary_path).exists(): + if Path(self.metrics_path).exists(): latest = self.read_latest() return latest.get("step", 0) return 0 diff --git a/src/dvclive/report.py b/src/dvclive/report.py index ba6a85b8..54f3cc7e 100644 --- a/src/dvclive/report.py +++ b/src/dvclive/report.py @@ -8,8 +8,8 @@ from dvc_render.table import TableRenderer from dvc_render.vega import VegaRenderer -from dvclive.data import PLOTS, Image, Scalar -from dvclive.data.plot import Plot +from dvclive.data import PLOTS, Image, Metric +from dvclive.data.sklearn_plot import SKLearnPlot from dvclive.serialize import load_yaml from dvclive.utils import parse_tsv @@ -17,20 +17,20 @@ from dvclive import Live -def get_scalar_renderers(scalars_folder): +def get_scalar_renderers(metrics_path): renderers = [] - for suffix in Scalar.suffixes: - for file in Path(scalars_folder).rglob(f"*{suffix}"): + for suffix in Metric.suffixes: + for file in metrics_path.rglob(f"*{suffix}"): data = parse_tsv(file) for row in data: row["rev"] = "workspace" - y = file.relative_to(scalars_folder).with_suffix("") + y = file.relative_to(metrics_path).with_suffix("") y = y.as_posix() - name = file.relative_to(scalars_folder.parent).with_suffix("") + name = file.relative_to(metrics_path.parent).with_suffix("") name = name.as_posix() - name = name.replace(scalars_folder.name, "static") + name = name.replace(metrics_path.name, "static") properties = {"x": "step", "y": y} renderers.append(VegaRenderer(data, name, **properties)) @@ -57,7 +57,7 @@ def get_image_renderers(images_folder): def get_plot_renderers(plots_folder): renderers = [] - for suffix in Plot.suffixes: + for suffix in SKLearnPlot.suffixes: for file in Path(plots_folder).rglob(f"*{suffix}"): name = file.stem data = json.loads(file.read_text()) @@ -71,12 +71,12 @@ def get_plot_renderers(plots_folder): def get_metrics_renderers(dvclive_summary): - summary_path = Path(dvclive_summary) - if summary_path.exists(): + metrics_path = Path(dvclive_summary) + if metrics_path.exists(): return [ TableRenderer( - [json.loads(summary_path.read_text(encoding="utf-8"))], - summary_path.name, + [json.loads(metrics_path.read_text(encoding="utf-8"))], + metrics_path.name, ) ] return [] @@ -95,14 +95,14 @@ def get_params_renderers(dvclive_params): def make_report(dvclive: "Live"): - dvclive_path = Path(dvclive.dir) + plots_path = Path(dvclive.plots_path) renderers = [] renderers.extend(get_params_renderers(dvclive.params_path)) - renderers.extend(get_metrics_renderers(dvclive.summary_path)) - renderers.extend(get_scalar_renderers(dvclive_path / Scalar.subfolder)) - renderers.extend(get_image_renderers(dvclive_path / Image.subfolder)) - renderers.extend(get_plot_renderers(dvclive_path / Plot.subfolder)) + renderers.extend(get_metrics_renderers(dvclive.metrics_path)) + renderers.extend(get_scalar_renderers(plots_path / Metric.subfolder)) + renderers.extend(get_image_renderers(plots_path / Image.subfolder)) + renderers.extend(get_plot_renderers(plots_path / SKLearnPlot.subfolder)) if dvclive.report_mode == "html": render_html(renderers, dvclive.report_path, refresh_seconds=5) diff --git a/src/dvclive/studio.py b/src/dvclive/studio.py index 35aed191..49e469e2 100644 --- a/src/dvclive/studio.py +++ b/src/dvclive/studio.py @@ -1,7 +1,7 @@ from os import getenv from dvclive.env import STUDIO_ENDPOINT -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics def _get_unsent_datapoints(plot, latest_step): @@ -28,14 +28,14 @@ def _to_dvc_format(plots): def _get_updates(live): - plots, metrics = parse_scalars(live) + plots, metrics = parse_metrics(live) latest_step = live._latest_studio_step # pylint: disable=protected-access for name, plot in plots.items(): datapoints = _get_unsent_datapoints(plot, latest_step) plots[name] = _cast_to_numbers(datapoints) - metrics = {live.summary_path: {"data": metrics}} + metrics = {live.metrics_path: {"data": metrics}} plots = _to_dvc_format(plots) return metrics, plots diff --git a/src/dvclive/utils.py b/src/dvclive/utils.py index debff7ff..2caf8f01 100644 --- a/src/dvclive/utils.py +++ b/src/dvclive/utils.py @@ -107,13 +107,13 @@ def parse_json(path): return json.load(fd) -def parse_scalars(live): - from .data import Scalar +def parse_metrics(live): + from .data import Metric - live_dir = Path(live.dir) + plots_path = Path(live.plots_path) history = {} - for suffix in Scalar.suffixes: - for scalar_file in live_dir.rglob(f"*{suffix}"): + for suffix in Metric.suffixes: + for scalar_file in plots_path.rglob(f"*{suffix}"): history[str(scalar_file)] = parse_tsv(scalar_file) - latest = parse_json(live.summary_path) + latest = parse_json(live.metrics_path) return history, latest diff --git a/tests/test_catalyst.py b/tests/test_catalyst.py index dbb83634..a3bb4aac 100644 --- a/tests/test_catalyst.py +++ b/tests/test_catalyst.py @@ -7,7 +7,7 @@ from dvclive import Live from dvclive.catalyst import DvcLiveCallback -from dvclive.data import Scalar +from dvclive.data import Metric # pylint: disable=redefined-outer-name, unused-argument @@ -67,8 +67,8 @@ def test_catalyst_callback(tmp_dir, runner, runner_params): assert os.path.exists("dvclive") - train_path = tmp_dir / "dvclive" / Scalar.subfolder / "train" - valid_path = tmp_dir / "dvclive" / Scalar.subfolder / "valid" + train_path = tmp_dir / "dvclive" / "plots" / Metric.subfolder / "train" + valid_path = tmp_dir / "dvclive" / "plots" / Metric.subfolder / "valid" assert train_path.is_dir() assert valid_path.is_dir() diff --git a/tests/test_data/test_image.py b/tests/test_data/test_image.py index 98b89a2c..093060b4 100644 --- a/tests/test_data/test_image.py +++ b/tests/test_data/test_image.py @@ -8,40 +8,44 @@ def test_PIL(tmp_dir): - dvclive = Live() + live = Live() img = Image.new("RGB", (500, 500), (250, 250, 250)) - dvclive.log_image("image.png", img) + live.log_image("image.png", img) - assert (tmp_dir / dvclive.dir / LiveImage.subfolder / "image.png").exists() + assert ( + tmp_dir / live.plots_path / LiveImage.subfolder / "image.png" + ).exists() def test_invalid_extension(tmp_dir): - dvclive = Live() + live = Live() img = Image.new("RGB", (500, 500), (250, 250, 250)) with pytest.raises(ValueError): - dvclive.log_image("image.foo", img) + live.log_image("image.foo", img) @pytest.mark.parametrize("shape", [(500, 500), (500, 500, 3), (500, 500, 4)]) def test_numpy(tmp_dir, shape): - dvclive = Live() + live = Live() img = np.ones(shape, np.uint8) * 255 - dvclive.log_image("image.png", img) + live.log_image("image.png", img) - assert (tmp_dir / dvclive.dir / LiveImage.subfolder / "image.png").exists() + assert ( + tmp_dir / live.plots_path / LiveImage.subfolder / "image.png" + ).exists() def test_step_formatting(tmp_dir): - dvclive = Live() + live = Live() img = np.ones((500, 500, 3), np.uint8) for _ in range(3): - dvclive.log_image("image.png", img) - dvclive.next_step() + live.log_image("image.png", img) + live.next_step() for step in range(3): assert ( tmp_dir - / dvclive.dir + / live.plots_path / LiveImage.subfolder / str(step) / "image.png" @@ -52,32 +56,36 @@ def test_step_rename(tmp_dir, mocker): from pathlib import Path rename = mocker.spy(Path, "rename") - dvclive = Live() + live = Live() img = np.ones((500, 500, 3), np.uint8) - dvclive.log_image("image.png", img) - assert (tmp_dir / dvclive.dir / LiveImage.subfolder / "image.png").exists() + live.log_image("image.png", img) + assert ( + tmp_dir / live.plots_path / LiveImage.subfolder / "image.png" + ).exists() - dvclive.next_step() + live.next_step() assert not ( - tmp_dir / dvclive.dir / LiveImage.subfolder / "image.png" + tmp_dir / live.plots_path / LiveImage.subfolder / "image.png" ).exists() assert ( - tmp_dir / dvclive.dir / LiveImage.subfolder / "0" / "image.png" + tmp_dir / live.plots_path / LiveImage.subfolder / "0" / "image.png" ).exists() rename.assert_called_once_with( - Path(dvclive.dir) / LiveImage.subfolder / "image.png", - Path(dvclive.dir) / LiveImage.subfolder / "0" / "image.png", + Path(live.plots_path) / LiveImage.subfolder / "image.png", + Path(live.plots_path) / LiveImage.subfolder / "0" / "image.png", ) def test_cleanup(tmp_dir): - dvclive = Live() + live = Live() img = np.ones((500, 500, 3), np.uint8) - dvclive.log_image("image.png", img) + live.log_image("image.png", img) - assert (tmp_dir / dvclive.dir / LiveImage.subfolder / "image.png").exists() + assert ( + tmp_dir / live.plots_path / LiveImage.subfolder / "image.png" + ).exists() Live() - assert not (tmp_dir / dvclive.dir / LiveImage.subfolder).exists() + assert not (tmp_dir / live.plots_path / LiveImage.subfolder).exists() diff --git a/tests/test_data/test_plot.py b/tests/test_data/test_plot.py index c2401b32..d00a5d6e 100644 --- a/tests/test_data/test_plot.py +++ b/tests/test_data/test_plot.py @@ -4,7 +4,7 @@ from sklearn import calibration, metrics from dvclive import Live -from dvclive.data.plot import Plot +from dvclive.data.sklearn_plot import SKLearnPlot # pylint: disable=redefined-outer-name, unused-argument @@ -28,13 +28,13 @@ def y_true_y_pred_y_score(): def test_log_calibration_curve(tmp_dir, y_true_y_pred_y_score, mocker): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, _, y_score = y_true_y_pred_y_score spy = mocker.spy(calibration, "calibration_curve") - live.log_plot("calibration", y_true, y_score) + live.log_sklearn_plot("calibration", y_true, y_score) spy.assert_called_once_with(y_true, y_score) @@ -43,13 +43,13 @@ def test_log_calibration_curve(tmp_dir, y_true_y_pred_y_score, mocker): def test_log_det_curve(tmp_dir, y_true_y_pred_y_score, mocker): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, _, y_score = y_true_y_pred_y_score spy = mocker.spy(metrics, "det_curve") - live.log_plot("det", y_true, y_score) + live.log_sklearn_plot("det", y_true, y_score) spy.assert_called_once_with(y_true, y_score) assert (out / "det.json").exists() @@ -57,13 +57,13 @@ def test_log_det_curve(tmp_dir, y_true_y_pred_y_score, mocker): def test_log_roc_curve(tmp_dir, y_true_y_pred_y_score, mocker): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, _, y_score = y_true_y_pred_y_score spy = mocker.spy(metrics, "roc_curve") - live.log_plot("roc", y_true, y_score) + live.log_sklearn_plot("roc", y_true, y_score) spy.assert_called_once_with(y_true, y_score) assert (out / "roc.json").exists() @@ -71,13 +71,13 @@ def test_log_roc_curve(tmp_dir, y_true_y_pred_y_score, mocker): def test_log_prc_curve(tmp_dir, y_true_y_pred_y_score, mocker): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, _, y_score = y_true_y_pred_y_score spy = mocker.spy(metrics, "precision_recall_curve") - live.log_plot("precision_recall", y_true, y_score) + live.log_sklearn_plot("precision_recall", y_true, y_score) spy.assert_called_once_with(y_true, y_score) assert (out / "precision_recall.json").exists() @@ -85,11 +85,11 @@ def test_log_prc_curve(tmp_dir, y_true_y_pred_y_score, mocker): def test_log_confusion_matrix(tmp_dir, y_true_y_pred_y_score, mocker): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, y_pred, _ = y_true_y_pred_y_score - live.log_plot("confusion_matrix", y_true, y_pred) + live.log_sklearn_plot("confusion_matrix", y_true, y_pred) cm = json.loads((out / "confusion_matrix.json").read_text()) @@ -101,11 +101,11 @@ def test_log_confusion_matrix(tmp_dir, y_true_y_pred_y_score, mocker): def test_step_exception(tmp_dir, y_true_y_pred_y_score): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, y_pred, _ = y_true_y_pred_y_score - live.log_plot("confusion_matrix", y_true, y_pred) + live.log_sklearn_plot("confusion_matrix", y_true, y_pred) assert (out / "confusion_matrix.json").exists() with pytest.raises(NotImplementedError): @@ -119,21 +119,21 @@ def test_dump_kwargs(tmp_dir, y_true_y_pred_y_score, mocker): spy = mocker.spy(metrics, "roc_curve") - live.log_plot("roc", y_true, y_score, drop_intermediate=True) + live.log_sklearn_plot("roc", y_true, y_score, drop_intermediate=True) spy.assert_called_once_with(y_true, y_score, drop_intermediate=True) def test_cleanup(tmp_dir, y_true_y_pred_y_score): live = Live() - out = tmp_dir / live.dir / Plot.subfolder + out = tmp_dir / live.plots_path / SKLearnPlot.subfolder y_true, y_pred, _ = y_true_y_pred_y_score - live.log_plot("confusion_matrix", y_true, y_pred) + live.log_sklearn_plot("confusion_matrix", y_true, y_pred) assert (out / "confusion_matrix.json").exists() Live() - assert not (tmp_dir / live.dir / Plot.subfolder).exists() + assert not (tmp_dir / live.plots_path / SKLearnPlot.subfolder).exists() diff --git a/tests/test_data/test_scalar.py b/tests/test_data/test_scalar.py index 1fc51c13..f585e91c 100644 --- a/tests/test_data/test_scalar.py +++ b/tests/test_data/test_scalar.py @@ -5,7 +5,7 @@ # pylint: disable=unused-argument from dvclive import Live -from dvclive.data.scalar import Scalar +from dvclive.data.metric import Metric from dvclive.data.utils import NUMPY_INTS, NUMPY_SCALARS from dvclive.utils import parse_tsv @@ -18,9 +18,9 @@ def test_numpy(tmp_dir, dtype): live.log("scalar", scalar) live.next_step() - parsed = json.loads((tmp_dir / live.summary_path).read_text()) + parsed = json.loads((tmp_dir / live.metrics_path).read_text()) assert isinstance(parsed["scalar"], int if dtype in NUMPY_INTS else float) - tsv_file = tmp_dir / live.dir / Scalar.subfolder / "scalar.tsv" + tsv_file = tmp_dir / live.plots_path / Metric.subfolder / "scalar.tsv" tsv_val = parse_tsv(tsv_file)[0]["scalar"] assert tsv_val == str(scalar) @@ -32,7 +32,9 @@ def test_name_with_dot(tmp_dir): live.log("scalar.foo.bar", 1.0) live.next_step() - tsv_file = tmp_dir / live.dir / Scalar.subfolder / "scalar.foo.bar.tsv" + tsv_file = ( + tmp_dir / live.plots_path / Metric.subfolder / "scalar.foo.bar.tsv" + ) assert tsv_file.exists() tsv_val = parse_tsv(tsv_file)[0]["scalar.foo.bar"] assert tsv_val == "1.0" diff --git a/tests/test_fastai.py b/tests/test_fastai.py index c4d501cd..13573fd0 100644 --- a/tests/test_fastai.py +++ b/tests/test_fastai.py @@ -10,7 +10,7 @@ ) from dvclive import Live -from dvclive.data.scalar import Scalar +from dvclive.data.metric import Metric from dvclive.fastai import DvcLiveCallback # pylint: disable=redefined-outer-name, unused-argument @@ -40,16 +40,19 @@ def data_loader(): def test_fastai_callback(tmp_dir, data_loader): learn = tabular_learner(data_loader, metrics=accuracy) learn.model_dir = os.path.abspath("./") - learn.fit_one_cycle(2, cbs=[DvcLiveCallback("model")]) + callback = DvcLiveCallback("model") + live = callback.dvclive + learn.fit_one_cycle(2, cbs=[callback]) - assert os.path.exists("dvclive") + assert os.path.exists(live.dir) - train_path = tmp_dir / "dvclive" / Scalar.subfolder / "train" - valid_path = tmp_dir / "dvclive" / Scalar.subfolder / "eval" + metrics_path = tmp_dir / live.plots_path / Metric.subfolder + train_path = metrics_path / "train" + valid_path = metrics_path / "eval" assert train_path.is_dir() assert valid_path.is_dir() - assert (tmp_dir / "dvclive" / Scalar.subfolder / "accuracy.tsv").exists() + assert (metrics_path / "accuracy.tsv").exists() def test_fastai_model_file(tmp_dir, data_loader): diff --git a/tests/test_huggingface.py b/tests/test_huggingface.py index 7befa896..2e1ce141 100644 --- a/tests/test_huggingface.py +++ b/tests/test_huggingface.py @@ -12,9 +12,9 @@ ) from dvclive import Live -from dvclive.data.scalar import Scalar +from dvclive.data.metric import Metric from dvclive.huggingface import DvcLiveCallback -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics # pylint: disable=redefined-outer-name, unused-argument, no-value-for-parameter @@ -113,13 +113,14 @@ def test_huggingface_integration(tmp_dir, model, args, data): trainer.add_callback(callback) trainer.train() - assert os.path.exists("dvclive") + live = callback.dvclive + assert os.path.exists(live.dir) - logs, _ = parse_scalars(callback.dvclive) + logs, _ = parse_metrics(live) assert len(logs) == 10 - scalars = os.path.join("dvclive", Scalar.subfolder) + scalars = os.path.join(live.plots_path, Metric.subfolder) assert os.path.join(scalars, "eval", "foo.tsv") in logs assert os.path.join(scalars, "eval", "loss.tsv") in logs assert os.path.join(scalars, "train", "loss.tsv") in logs diff --git a/tests/test_keras.py b/tests/test_keras.py index d3a1c0d9..33c9fe75 100644 --- a/tests/test_keras.py +++ b/tests/test_keras.py @@ -3,9 +3,9 @@ import pytest from dvclive import Live -from dvclive.data.scalar import Scalar +from dvclive.data.metric import Metric from dvclive.keras import DvcLiveCallback -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics # pylint: disable=unused-argument, no-name-in-module, redefined-outer-name @@ -49,9 +49,9 @@ def test_keras_callback(tmp_dir, xor_model, capture_wrap): ) assert os.path.exists("dvclive") - logs, _ = parse_scalars(callback.dvclive) + logs, _ = parse_metrics(callback.dvclive) - scalars = os.path.join("dvclive", Scalar.subfolder) + scalars = os.path.join(callback.dvclive.plots_path, Metric.subfolder) assert os.path.join(scalars, "train", "accuracy.tsv") in logs assert os.path.join(scalars, "eval", "accuracy.tsv") in logs diff --git a/tests/test_lgbm.py b/tests/test_lgbm.py index ad29c952..505d17e4 100644 --- a/tests/test_lgbm.py +++ b/tests/test_lgbm.py @@ -10,7 +10,7 @@ from dvclive import Live from dvclive.lgbm import DvcLiveCallback -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics # pylint: disable=redefined-outer-name, unused-argument @@ -49,7 +49,7 @@ def test_lgbm_integration(tmp_dir, model_params, iris_data): assert os.path.exists("dvclive") - logs, _ = parse_scalars(callback.dvclive) + logs, _ = parse_metrics(callback.dvclive) assert len(logs) == 1 assert len(list(logs.values())[0]) == 5 diff --git a/tests/test_lightning.py b/tests/test_lightning.py index 793ed74f..52dd6b22 100644 --- a/tests/test_lightning.py +++ b/tests/test_lightning.py @@ -8,9 +8,9 @@ from torch.optim import Adam from torch.utils.data import DataLoader, Dataset -from dvclive.data.scalar import Scalar +from dvclive.data.metric import Metric from dvclive.lightning import DvcLiveLogger -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics # pylint: disable=redefined-outer-name, unused-argument @@ -100,8 +100,10 @@ def test_lightning_integration(tmp_dir): assert os.path.exists("logs") assert not os.path.exists("DvcLiveLogger") - scalars = os.path.join(dvclive_logger.experiment.dir, Scalar.subfolder) - logs, _ = parse_scalars(dvclive_logger.experiment) + scalars = os.path.join( + dvclive_logger.experiment.plots_path, Metric.subfolder + ) + logs, _ = parse_metrics(dvclive_logger.experiment) assert len(logs) == 3 assert os.path.join(scalars, "train", "epoch", "loss.tsv") in logs diff --git a/tests/test_main.py b/tests/test_main.py index 69e8d913..6437eee4 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -6,7 +6,7 @@ from funcy import last from dvclive import Live, env -from dvclive.data import Scalar +from dvclive.data import Metric from dvclive.error import ( ConfigMismatchError, DataAlreadyLoggedError, @@ -14,14 +14,14 @@ InvalidParameterTypeError, ) from dvclive.serialize import load_yaml -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics def read_history(live, metric): - history, _ = parse_scalars(live) + history, _ = parse_metrics(live) steps = [] values = [] - name = os.path.join(live.dir, Scalar.subfolder, f"{metric}.tsv") + name = os.path.join(live.plots_path, Metric.subfolder, f"{metric}.tsv") for e in history[name]: steps.append(int(e["step"])) values.append(float(e[metric])) @@ -29,7 +29,7 @@ def read_history(live, metric): def read_latest(live, metric_name): - _, latest = parse_scalars(live) + _, latest = parse_metrics(live) return latest["step"], latest[metric_name] @@ -39,9 +39,9 @@ def test_logging_no_step(tmp_dir): dvclive.log("m1", 1) assert not (tmp_dir / "logs" / "m1.tsv").is_file() - assert (tmp_dir / dvclive.summary_path).is_file() + assert (tmp_dir / dvclive.metrics_path).is_file() - s = load_yaml(dvclive.summary_path) + s = load_yaml(dvclive.metrics_path) assert s["m1"] == 1 assert "step" not in s @@ -123,10 +123,12 @@ def test_logging_step(tmp_dir, path): dvclive.log("m1", 1) dvclive.next_step() assert (tmp_dir / dvclive.dir).is_dir() - assert (tmp_dir / dvclive.dir / Scalar.subfolder / "m1.tsv").is_file() - assert (tmp_dir / dvclive.summary_path).is_file() + assert ( + tmp_dir / dvclive.plots_path / Metric.subfolder / "m1.tsv" + ).is_file() + assert (tmp_dir / dvclive.metrics_path).is_file() - s = load_yaml(dvclive.summary_path) + s = load_yaml(dvclive.metrics_path) assert s["m1"] == 1 assert s["step"] == 0 @@ -134,7 +136,7 @@ def test_logging_step(tmp_dir, path): def test_nested_logging(tmp_dir): dvclive = Live("logs") - out = tmp_dir / dvclive.dir / Scalar.subfolder + out = tmp_dir / dvclive.plots_path / Metric.subfolder dvclive.log("train/m1", 1) dvclive.log("val/val_1/m1", 1) @@ -147,7 +149,7 @@ def test_nested_logging(tmp_dir): assert (out / "val" / "val_1" / "m1.tsv").is_file() assert (out / "val" / "val_1" / "m2.tsv").is_file() - summary = load_yaml(dvclive.summary_path) + summary = load_yaml(dvclive.metrics_path) assert summary["train"]["m1"] == 1 assert summary["val"]["val_1"]["m1"] == 1 @@ -169,15 +171,17 @@ def test_cleanup(tmp_dir, html): (tmp_dir / "logs" / "some_user_file.txt").touch() - assert (tmp_dir / dvclive.dir / Scalar.subfolder / "m1.tsv").is_file() - assert (tmp_dir / dvclive.summary_path).is_file() + assert ( + tmp_dir / dvclive.plots_path / Metric.subfolder / "m1.tsv" + ).is_file() + assert (tmp_dir / dvclive.metrics_path).is_file() assert html_path.is_file() == html dvclive = Live("logs") assert (tmp_dir / "logs" / "some_user_file.txt").is_file() - assert not (tmp_dir / dvclive.dir / Scalar.subfolder).exists() - assert not (tmp_dir / dvclive.summary_path).is_file() + assert not (tmp_dir / dvclive.plots_path / Metric.subfolder).exists() + assert not (tmp_dir / dvclive.metrics_path).is_file() assert not (html_path).is_file() diff --git a/tests/test_report.py b/tests/test_report.py index 6f8cdd37..e1d8db12 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -6,8 +6,8 @@ from dvclive import Live from dvclive.data import Image as LiveImage -from dvclive.data import Scalar -from dvclive.data.plot import ConfusionMatrix, Plot +from dvclive.data import Metric +from dvclive.data.sklearn_plot import ConfusionMatrix, SKLearnPlot from dvclive.env import DVCLIVE_OPEN from dvclive.report import ( get_image_renderers, @@ -31,10 +31,10 @@ def test_get_renderers(tmp_dir, mocker): live.next_step() live.set_step(None) - live.log_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1]) + live.log_sklearn_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1]) image_renderers = get_image_renderers( - tmp_dir / live.dir / LiveImage.subfolder + tmp_dir / live.plots_path / LiveImage.subfolder ) assert len(image_renderers) == 2 image_renderers = sorted( @@ -46,7 +46,7 @@ def test_get_renderers(tmp_dir, mocker): ] scalar_renderers = get_scalar_renderers( - tmp_dir / live.dir / Scalar.subfolder + tmp_dir / live.plots_path / Metric.subfolder ) assert len(scalar_renderers) == 1 assert scalar_renderers[0].datapoints == [ @@ -66,7 +66,9 @@ def test_get_renderers(tmp_dir, mocker): assert scalar_renderers[0].properties["y"] == "foo/bar" assert scalar_renderers[0].name == "static/foo/bar" - plot_renderers = get_plot_renderers(tmp_dir / live.dir / Plot.subfolder) + plot_renderers = get_plot_renderers( + tmp_dir / live.plots_path / SKLearnPlot.subfolder + ) assert len(plot_renderers) == 1 assert plot_renderers[0].datapoints == [ {"actual": "0", "rev": "workspace", "predicted": "1"}, @@ -76,7 +78,7 @@ def test_get_renderers(tmp_dir, mocker): ] assert plot_renderers[0].properties == ConfusionMatrix.get_properties() - metrics_renderer = get_metrics_renderers(live.summary_path)[0] + metrics_renderer = get_metrics_renderers(live.metrics_path)[0] assert metrics_renderer.datapoints == [{"step": 1, "foo": {"bar": 1}}] params_renderer = get_params_renderers(live.params_path)[0] @@ -101,7 +103,7 @@ def test_report_init(monkeypatch): @pytest.mark.parametrize("mode", ["html", "md"]) -def test_make_report(tmp_dir, mode): +def test_make_report(tmp_dir, mode, mocker): live = Live(report=mode) for i in range(3): live.log("foobar", i) @@ -117,7 +119,7 @@ def test_make_report(tmp_dir, mode): def test_make_report_open(tmp_dir, mocker, monkeypatch): mocked_open = mocker.patch("webbrowser.open") live = Live() - live.log_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1]) + live.log_sklearn_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1]) live.make_report() live.make_report() @@ -132,7 +134,7 @@ def test_make_report_open(tmp_dir, mocker, monkeypatch): monkeypatch.setenv(DVCLIVE_OPEN, True) live = Live() - live.log_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1]) + live.log_sklearn_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1]) live.make_report() mocked_open.assert_called_once() diff --git a/tests/test_studio.py b/tests/test_studio.py index ad5b0714..4c18deab 100644 --- a/tests/test_studio.py +++ b/tests/test_studio.py @@ -5,7 +5,7 @@ import pytest from dvclive import Live, env -from dvclive.data import Scalar +from dvclive.data import Metric @pytest.mark.studio @@ -20,7 +20,7 @@ def test_post_to_studio(tmp_dir, mocker, monkeypatch): live = Live() - scalar_path = os.path.join(live.dir, Scalar.subfolder, "foo.tsv") + scalar_path = os.path.join(live.plots_path, Metric.subfolder, "foo.tsv") mocked_post.assert_called_with( "https://0.0.0.0", @@ -47,7 +47,7 @@ def test_post_to_studio(tmp_dir, mocker, monkeypatch): "repo_url": "STUDIO_REPO_URL", "rev": mocker.ANY, "step": 0, - "metrics": {"dvclive.json": {"data": {"step": 0, "foo": 1}}}, + "metrics": {live.metrics_path: {"data": {"step": 0, "foo": 1}}}, "plots": { scalar_path: { "data": [{"timestamp": mocker.ANY, "step": 0, "foo": 1.0}] @@ -72,7 +72,7 @@ def test_post_to_studio(tmp_dir, mocker, monkeypatch): "repo_url": "STUDIO_REPO_URL", "rev": mocker.ANY, "step": 1, - "metrics": {"dvclive.json": {"data": {"step": 1, "foo": 2}}}, + "metrics": {live.metrics_path: {"data": {"step": 1, "foo": 2}}}, "plots": { scalar_path: { "data": [{"timestamp": mocker.ANY, "step": 1, "foo": 2.0}] @@ -117,7 +117,7 @@ def test_post_to_studio_failed_data_request(tmp_dir, mocker, monkeypatch): live = Live() - scalar_path = os.path.join(live.dir, Scalar.subfolder, "foo.tsv") + scalar_path = os.path.join(live.plots_path, Metric.subfolder, "foo.tsv") error_response = mocker.MagicMock() error_response.status_code = 400 @@ -135,7 +135,7 @@ def test_post_to_studio_failed_data_request(tmp_dir, mocker, monkeypatch): "repo_url": "STUDIO_REPO_URL", "rev": mocker.ANY, "step": 1, - "metrics": {"dvclive.json": {"data": {"step": 1, "foo": 2}}}, + "metrics": {live.metrics_path: {"data": {"step": 1, "foo": 2}}}, "plots": { scalar_path: { "data": [ diff --git a/tests/test_xgboost.py b/tests/test_xgboost.py index 7718cdc9..2978430d 100644 --- a/tests/test_xgboost.py +++ b/tests/test_xgboost.py @@ -7,7 +7,7 @@ from sklearn import datasets from dvclive import Live -from dvclive.utils import parse_scalars +from dvclive.utils import parse_metrics from dvclive.xgb import DvcLiveCallback # pylint: disable=redefined-outer-name, unused-argument @@ -38,7 +38,7 @@ def test_xgb_integration(tmp_dir, train_params, iris_data): assert os.path.exists("dvclive") - logs, _ = parse_scalars(callback.dvclive) + logs, _ = parse_metrics(callback.dvclive) assert len(logs) == 1 assert len(list(logs.values())[0]) == 5