From 9157570d4d09e147e084eb0db2968c57b42f5b42 Mon Sep 17 00:00:00 2001
From: David de la Iglesia Castro <daviddelaiglesiacastro@gmail.com>
Date: Thu, 27 Oct 2022 18:39:18 +0200
Subject: [PATCH] Introduce public `summary`. Remove "no step" / "step" logic
 from plots. (#331)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It was initially introduced for supporting different logging format between step and not step updates.

For `live.log_image`, "step" mode now overwrites the path instead of creating subfolder by step.

For `live.log`, the "no step" was meant to not generate the `.tsv` file but only the `.json`.
Added a public property `summary` so "no step" scenarios can work as follows:

```
live = Live()

live.summary["foo"] = 1
live.make_summary()
```

Closes #326

Apply suggestions from code review

Co-authored-by: Paweł Redzyński <pawelredzynski@gmail.com>

Co-authored-by: Paweł Redzyński <pawelredzynski@gmail.com>
---
 src/dvclive/data/base.py                      | 76 ----------------
 src/dvclive/data/image.py                     | 50 ----------
 src/dvclive/error.py                          |  4 +-
 src/dvclive/live.py                           | 58 +++++-------
 src/dvclive/{data => plots}/__init__.py       | 12 +--
 src/dvclive/plots/base.py                     | 41 +++++++++
 src/dvclive/plots/image.py                    | 31 +++++++
 src/dvclive/{data => plots}/metric.py         | 22 +----
 .../sklearn_plot.py => plots/sklearn.py}      | 71 ++++-----------
 src/dvclive/{data => plots}/utils.py          |  0
 src/dvclive/report.py                         | 10 +-
 src/dvclive/serialize.py                      | 12 ++-
 src/dvclive/utils.py                          |  3 +-
 tests/plots/test_image.py                     | 64 +++++++++++++
 .../test_scalar.py => plots/test_metric.py}   |  4 +-
 .../test_plot.py => plots/test_sklearn.py}    | 31 ++++---
 tests/test_catalyst.py                        |  2 +-
 tests/test_data/test_image.py                 | 91 -------------------
 tests/test_fastai.py                          |  2 +-
 tests/test_huggingface.py                     |  2 +-
 tests/test_keras.py                           |  2 +-
 tests/test_lightning.py                       |  2 +-
 tests/test_main.py                            | 19 +++-
 tests/test_report.py                          | 22 ++---
 tests/test_studio.py                          |  2 +-
 25 files changed, 255 insertions(+), 378 deletions(-)
 delete mode 100644 src/dvclive/data/base.py
 delete mode 100644 src/dvclive/data/image.py
 rename src/dvclive/{data => plots}/__init__.py (60%)
 create mode 100644 src/dvclive/plots/base.py
 create mode 100644 src/dvclive/plots/image.py
 rename src/dvclive/{data => plots}/metric.py (80%)
 rename src/dvclive/{data/sklearn_plot.py => plots/sklearn.py} (66%)
 rename src/dvclive/{data => plots}/utils.py (100%)
 create mode 100644 tests/plots/test_image.py
 rename tests/{test_data/test_scalar.py => plots/test_metric.py} (91%)
 rename tests/{test_data/test_plot.py => plots/test_sklearn.py} (89%)
 delete mode 100644 tests/test_data/test_image.py

diff --git a/src/dvclive/data/base.py b/src/dvclive/data/base.py
deleted file mode 100644
index df0c6fdf..00000000
--- a/src/dvclive/data/base.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import abc
-from pathlib import Path
-from typing import Any, Dict, List, Optional
-
-from dvclive.error import DataAlreadyLoggedError
-
-
-class Data(abc.ABC):
-    def __init__(self, name: str, output_folder: str) -> None:
-        self.name = name
-        self.output_folder: Path = Path(output_folder) / self.subfolder
-        self._step: Optional[int] = None
-        self.val: Optional[List[Any]] = None
-        self._step_none_logged: bool = False
-        self._dump_kwargs: Optional[Dict[str, Any]] = None
-
-    @property
-    def step(self) -> Optional[int]:
-        return self._step
-
-    @step.setter
-    def step(self, val: int) -> None:
-        if self._step_none_logged and val == self._step:
-            raise DataAlreadyLoggedError(self.name, val)
-
-        self._step = val
-
-    @property
-    @abc.abstractmethod
-    def output_path(self) -> Path:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def no_step_output_path(self) -> Path:
-        pass
-
-    @property
-    @abc.abstractmethod
-    def subfolder(self):
-        pass
-
-    @property
-    @abc.abstractmethod
-    def summary(self):
-        pass
-
-    @staticmethod
-    @abc.abstractmethod
-    def could_log(val: object) -> bool:
-        pass
-
-    def dump(self, val: List[Any], step: Optional[int], **kwargs):
-        assert val is not None
-        self.val = val
-        self.step = step
-        self._dump_kwargs = kwargs
-        if not self._step_none_logged and step is None:
-            self._step_none_logged = True
-            self.no_step_dump()
-        elif step == 0:
-            self.first_step_dump()
-        else:
-            self.step_dump()
-
-    @abc.abstractmethod
-    def first_step_dump(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    def no_step_dump(self) -> None:
-        pass
-
-    @abc.abstractmethod
-    def step_dump(self) -> None:
-        pass
diff --git a/src/dvclive/data/image.py b/src/dvclive/data/image.py
deleted file mode 100644
index c2f96948..00000000
--- a/src/dvclive/data/image.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from pathlib import Path
-
-from .base import Data
-
-
-class Image(Data):
-    suffixes = [".jpg", ".jpeg", ".gif", ".png"]
-    subfolder = "images"
-
-    @property
-    def no_step_output_path(self) -> Path:
-        return self.output_folder / self.name
-
-    @property
-    def output_path(self) -> Path:
-        if self._step is None:
-            output_path = self.no_step_output_path
-        else:
-            output_path = self.output_folder / f"{self._step}" / self.name
-        output_path.parent.mkdir(exist_ok=True, parents=True)
-        return output_path
-
-    @staticmethod
-    def could_log(val: object) -> bool:
-        if val.__class__.__module__ == "PIL.Image":
-            return True
-        if val.__class__.__module__ == "numpy":
-            return True
-        return False
-
-    def first_step_dump(self) -> None:
-        if self.no_step_output_path.exists():
-            self.no_step_output_path.rename(self.output_path)
-
-    def no_step_dump(self) -> None:
-        self.step_dump()
-
-    def step_dump(self) -> None:
-        if self.val.__class__.__module__ == "numpy":
-            from PIL import Image as ImagePIL
-
-            _val = ImagePIL.fromarray(self.val)
-        else:
-            _val = self.val
-
-        _val.save(self.output_path)
-
-    @property
-    def summary(self):
-        return {self.name: str(self.output_path)}
diff --git a/src/dvclive/error.py b/src/dvclive/error.py
index 9c863fe3..24c13e14 100644
--- a/src/dvclive/error.py
+++ b/src/dvclive/error.py
@@ -14,12 +14,12 @@ def __init__(self, name, val):
 
 class InvalidPlotTypeError(DvcLiveError):
     def __init__(self, name):
-        from .data import PLOTS
+        from .plots import SKLEARN_PLOTS
 
         self.name = name
         super().__init__(
             f"Plot type '{name}' is not supported."
-            f"\nSupported types are: {list(PLOTS)}"
+            f"\nSupported types are: {list(SKLEARN_PLOTS)}"
         )
 
 
diff --git a/src/dvclive/live.py b/src/dvclive/live.py
index 61377585..133a0a4a 100644
--- a/src/dvclive/live.py
+++ b/src/dvclive/live.py
@@ -2,23 +2,21 @@
 import logging
 import os
 import shutil
-from collections import OrderedDict
-from itertools import chain
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Union
 
 from ruamel.yaml.representer import RepresenterError
 
 from . import env
-from .data import DATA_TYPES, PLOTS, Image, Metric, NumpyEncoder
 from .dvc import make_checkpoint
 from .error import (
     InvalidDataTypeError,
     InvalidParameterTypeError,
     InvalidPlotTypeError,
 )
+from .plots import PLOT_TYPES, SKLEARN_PLOTS, Image, Metric, NumpyEncoder
 from .report import make_report
-from .serialize import dump_yaml, load_yaml
+from .serialize import dump_json, dump_yaml, load_yaml
 from .studio import post_to_studio
 from .utils import (
     env2bool,
@@ -66,11 +64,12 @@ def __init__(
         self.report_mode: Optional[str] = report
         self.report_path = ""
 
+        self.summary: Dict[str, Any] = {}
         self._step: Optional[int] = None
-        self._scalars: Dict[str, Any] = OrderedDict()
-        self._images: Dict[str, Any] = OrderedDict()
-        self._plots: Dict[str, Any] = OrderedDict()
-        self._params: Dict[str, Any] = OrderedDict()
+        self._metrics: Dict[str, Any] = {}
+        self._images: Dict[str, Any] = {}
+        self._plots: Dict[str, Any] = {}
+        self._params: Dict[str, Any] = {}
 
         self._init_paths()
 
@@ -90,7 +89,6 @@ def __init__(
             self._cleanup()
 
         self._latest_studio_step = self.get_step()
-
         if self.report_mode == "studio":
             from scmrepo.git import Git
 
@@ -104,9 +102,9 @@ def __init__(
                 self.report_mode = None
 
     def _cleanup(self):
-        for data_type in DATA_TYPES:
+        for plot_type in PLOT_TYPES:
             shutil.rmtree(
-                Path(self.plots_path) / data_type.subfolder, ignore_errors=True
+                Path(self.plots_path) / plot_type.subfolder, ignore_errors=True
             )
 
         for f in (self.metrics_path, self.report_path, self.params_path):
@@ -138,12 +136,6 @@ def get_step(self) -> int:
     def set_step(self, step: int) -> None:
         if self._step is None:
             self._step = 0
-            for data in chain(
-                self._scalars.values(),
-                self._images.values(),
-                self._plots.values(),
-            ):
-                data.dump(data.val, self._step)
             self.make_summary()
 
         if self.report_mode == "studio":
@@ -165,14 +157,16 @@ def log(self, name: str, val: Union[int, float]):
         if not Metric.could_log(val):
             raise InvalidDataTypeError(name, type(val))
 
-        if name in self._scalars:
-            data = self._scalars[name]
+        if name in self._metrics:
+            data = self._metrics[name]
         else:
             data = Metric(name, self.plots_path)
-            self._scalars[name] = data
+            self._metrics[name] = data
 
-        data.dump(val, self._step)
+        data.step = self.get_step()
+        data.dump(val)
 
+        self.summary = nested_update(self.summary, data.to_summary(val))
         self.make_summary()
         logger.debug(f"Logged {name}: {val}")
 
@@ -186,7 +180,8 @@ def log_image(self, name: str, val):
             data = Image(name, self.plots_path)
             self._images[name] = data
 
-        data.dump(val, self._step)
+        data.step = self.get_step()
+        data.dump(val)
         logger.debug(f"Logged {name}: {val}")
 
     def log_sklearn_plot(self, kind, labels, predictions, name=None, **kwargs):
@@ -195,13 +190,14 @@ def log_sklearn_plot(self, kind, labels, predictions, name=None, **kwargs):
         name = name or kind
         if name in self._plots:
             data = self._plots[name]
-        elif kind in PLOTS and PLOTS[kind].could_log(val):
-            data = PLOTS[kind](name, self.plots_path)
+        elif kind in SKLEARN_PLOTS and SKLEARN_PLOTS[kind].could_log(val):
+            data = SKLEARN_PLOTS[kind](name, self.plots_path)
             self._plots[name] = data
         else:
             raise InvalidPlotTypeError(name)
 
-        data.dump(val, self._step, **kwargs)
+        data.step = self.get_step()
+        data.dump(val, **kwargs)
         logger.debug(f"Logged {name}")
 
     def _read_params(self):
@@ -211,7 +207,7 @@ def _read_params(self):
 
     def _dump_params(self):
         try:
-            dump_yaml(self.params_path, self._params)
+            dump_yaml(self._params, self.params_path)
         except RepresenterError as exc:
             raise InvalidParameterTypeError(exc.args) from exc
 
@@ -230,15 +226,9 @@ def log_param(
         self.log_params({name: val})
 
     def make_summary(self):
-        summary_data = {}
         if self._step is not None:
-            summary_data["step"] = self.get_step()
-
-        for data in self._scalars.values():
-            summary_data = nested_update(summary_data, data.summary)
-
-        with open(self.metrics_path, "w", encoding="utf-8") as f:
-            json.dump(summary_data, f, indent=4, cls=NumpyEncoder)
+            self.summary["step"] = self.get_step()
+        dump_json(self.summary, self.metrics_path, cls=NumpyEncoder)
 
     def make_report(self):
         if self.report_mode is not None:
diff --git a/src/dvclive/data/__init__.py b/src/dvclive/plots/__init__.py
similarity index 60%
rename from src/dvclive/data/__init__.py
rename to src/dvclive/plots/__init__.py
index 0829d446..2b7ec00d 100644
--- a/src/dvclive/data/__init__.py
+++ b/src/dvclive/plots/__init__.py
@@ -1,19 +1,13 @@
 from .image import Image
 from .metric import Metric
-from .sklearn_plot import (
-    Calibration,
-    ConfusionMatrix,
-    Det,
-    PrecisionRecall,
-    Roc,
-)
+from .sklearn import Calibration, ConfusionMatrix, Det, PrecisionRecall, Roc
 from .utils import NumpyEncoder  # noqa: F401
 
-PLOTS = {
+SKLEARN_PLOTS = {
     "calibration": Calibration,
     "confusion_matrix": ConfusionMatrix,
     "det": Det,
     "precision_recall": PrecisionRecall,
     "roc": Roc,
 }
-DATA_TYPES = (*PLOTS.values(), Metric, Image)
+PLOT_TYPES = (*SKLEARN_PLOTS.values(), Metric, Image)
diff --git a/src/dvclive/plots/base.py b/src/dvclive/plots/base.py
new file mode 100644
index 00000000..8473bacb
--- /dev/null
+++ b/src/dvclive/plots/base.py
@@ -0,0 +1,41 @@
+import abc
+from pathlib import Path
+from typing import Optional
+
+from dvclive.error import DataAlreadyLoggedError
+
+
+class Data(abc.ABC):
+    def __init__(self, name: str, output_folder: str) -> None:
+        self.name = name
+        self.output_folder: Path = Path(output_folder) / self.subfolder
+        self._step: Optional[int] = None
+
+    @property
+    def step(self) -> Optional[int]:
+        return self._step
+
+    @step.setter
+    def step(self, val: int) -> None:
+        if val == self._step:
+            raise DataAlreadyLoggedError(self.name, val)
+        self._step = val
+
+    @property
+    @abc.abstractmethod
+    def output_path(self) -> Path:
+        pass
+
+    @property
+    @abc.abstractmethod
+    def subfolder(self):
+        pass
+
+    @staticmethod
+    @abc.abstractmethod
+    def could_log(val) -> bool:
+        pass
+
+    @abc.abstractmethod
+    def dump(self, val, **kwargs):
+        pass
diff --git a/src/dvclive/plots/image.py b/src/dvclive/plots/image.py
new file mode 100644
index 00000000..2e4a14c3
--- /dev/null
+++ b/src/dvclive/plots/image.py
@@ -0,0 +1,31 @@
+from pathlib import Path
+
+from .base import Data
+
+
+class Image(Data):
+    suffixes = [".jpg", ".jpeg", ".gif", ".png"]
+    subfolder = "images"
+
+    @property
+    def output_path(self) -> Path:
+        _path = self.output_folder / self.name
+        _path.parent.mkdir(exist_ok=True, parents=True)
+        return _path
+
+    @staticmethod
+    def could_log(val: object) -> bool:
+        if val.__class__.__module__ == "PIL.Image":
+            return True
+        if val.__class__.__module__ == "numpy":
+            return True
+        return False
+
+    def dump(self, val, **kwargs) -> None:
+        if val.__class__.__module__ == "numpy":
+            from PIL import Image as ImagePIL
+
+            pil_image = ImagePIL.fromarray(val)
+        else:
+            pil_image = val
+        pil_image.save(self.output_path)
diff --git a/src/dvclive/data/metric.py b/src/dvclive/plots/metric.py
similarity index 80%
rename from src/dvclive/data/metric.py
rename to src/dvclive/plots/metric.py
index c1d965f6..2ac5cf16 100644
--- a/src/dvclive/data/metric.py
+++ b/src/dvclive/plots/metric.py
@@ -29,33 +29,19 @@ def output_path(self) -> Path:
         _path.parent.mkdir(exist_ok=True, parents=True)
         return _path
 
-    @property
-    def no_step_output_path(self) -> Path:
-        return self.output_path
-
-    def first_step_dump(self) -> None:
-        self.step_dump()
-
-    def no_step_dump(self) -> None:
-        pass
-
-    def step_dump(self) -> None:
+    def dump(self, val, **kwargs) -> None:
         ts = int(time.time() * 1000)
         d = OrderedDict(
-            [("timestamp", ts), ("step", self.step), (self.name, self.val)]
+            [("timestamp", ts), ("step", self.step), (self.name, val)]
         )
-
         existed = self.output_path.exists()
         with open(self.output_path, "a", encoding="utf-8") as fobj:
             writer = csv.DictWriter(fobj, d.keys(), delimiter="\t")
-
             if not existed:
                 writer.writeheader()
-
             writer.writerow(d)
 
-    @property
-    def summary(self):
+    def to_summary(self, val):
         d = {}
-        nested_set(d, os.path.normpath(self.name).split(os.path.sep), self.val)
+        nested_set(d, os.path.normpath(self.name).split(os.path.sep), val)
         return d
diff --git a/src/dvclive/data/sklearn_plot.py b/src/dvclive/plots/sklearn.py
similarity index 66%
rename from src/dvclive/data/sklearn_plot.py
rename to src/dvclive/plots/sklearn.py
index af24cb13..1e86a24e 100644
--- a/src/dvclive/data/sklearn_plot.py
+++ b/src/dvclive/plots/sklearn.py
@@ -1,6 +1,7 @@
-import json
 from pathlib import Path
 
+from dvclive.serialize import dump_json
+
 from .base import Data
 
 
@@ -21,32 +22,6 @@ def could_log(val: object) -> bool:
             return True
         return False
 
-    @property
-    def no_step_output_path(self) -> Path:
-        return super().no_step_output_path.with_suffix(".json")
-
-    @property
-    def summary(self):
-        return {}
-
-    @staticmethod
-    def write_json(content, output_file):
-        with open(output_file, "w", encoding="utf-8") as f:
-            json.dump(content, f, indent=4)
-
-    def no_step_dump(self) -> None:
-        raise NotImplementedError
-
-    def first_step_dump(self) -> None:
-        raise NotImplementedError(
-            "DVCLive plots can only be used in no-step mode."
-        )
-
-    def step_dump(self) -> None:
-        raise NotImplementedError(
-            "DVCLive plots can only be used in no-step mode."
-        )
-
     @staticmethod
     def get_properties():
         raise NotImplementedError
@@ -63,13 +38,11 @@ def get_properties():
             "y_label": "True Positive Rate",
         }
 
-    def no_step_dump(self) -> None:
-        assert self.val is not None
-
+    def dump(self, val, **kwargs) -> None:
         from sklearn import metrics
 
         fpr, tpr, roc_thresholds = metrics.roc_curve(
-            y_true=self.val[0], y_score=self.val[1], **self._dump_kwargs
+            y_true=val[0], y_score=val[1], **kwargs
         )
         roc = {
             "roc": [
@@ -77,7 +50,7 @@ def no_step_dump(self) -> None:
                 for fp, tp, t in zip(fpr, tpr, roc_thresholds)
             ]
         }
-        self.write_json(roc, self.output_path)
+        dump_json(roc, self.output_path)
 
 
 class PrecisionRecall(SKLearnPlot):
@@ -91,13 +64,11 @@ def get_properties():
             "y_label": "Precision",
         }
 
-    def no_step_dump(self) -> None:
-        assert self.val is not None
-
+    def dump(self, val, **kwargs) -> None:
         from sklearn import metrics
 
         precision, recall, prc_thresholds = metrics.precision_recall_curve(
-            y_true=self.val[0], probas_pred=self.val[1], **self._dump_kwargs
+            y_true=val[0], probas_pred=val[1], **kwargs
         )
 
         prc = {
@@ -106,7 +77,7 @@ def no_step_dump(self) -> None:
                 for p, r, t in zip(precision, recall, prc_thresholds)
             ]
         }
-        self.write_json(prc, self.output_path)
+        dump_json(prc, self.output_path)
 
 
 class Det(SKLearnPlot):
@@ -120,13 +91,11 @@ def get_properties():
             "y_label": "False Negative Rate",
         }
 
-    def no_step_dump(self) -> None:
-        assert self.val is not None
-
+    def dump(self, val, **kwargs) -> None:
         from sklearn import metrics
 
         fpr, fnr, roc_thresholds = metrics.det_curve(
-            y_true=self.val[0], y_score=self.val[1], **self._dump_kwargs
+            y_true=val[0], y_score=val[1], **kwargs
         )
 
         det = {
@@ -135,7 +104,7 @@ def no_step_dump(self) -> None:
                 for fp, fn, t in zip(fpr, fnr, roc_thresholds)
             ]
         }
-        self.write_json(det, self.output_path)
+        dump_json(det, self.output_path)
 
 
 class ConfusionMatrix(SKLearnPlot):
@@ -150,14 +119,12 @@ def get_properties():
             "y_label": "Predicted Label",
         }
 
-    def no_step_dump(self) -> None:
-        assert self.val is not None
-
+    def dump(self, val, **kwargs) -> None:
         cm = [
             {"actual": str(actual), "predicted": str(predicted)}
-            for actual, predicted in zip(self.val[0], self.val[1])
+            for actual, predicted in zip(val[0], val[1])
         ]
-        self.write_json(cm, self.output_path)
+        dump_json(cm, self.output_path)
 
 
 class Calibration(SKLearnPlot):
@@ -171,19 +138,17 @@ def get_properties():
             "y_label": "Fraction of Positives",
         }
 
-    def no_step_dump(self) -> None:
-        assert self.val is not None
-
+    def dump(self, val, **kwargs) -> None:
         from sklearn import calibration
 
         prob_true, prob_pred = calibration.calibration_curve(
-            y_true=self.val[0], y_prob=self.val[1], **self._dump_kwargs
+            y_true=val[0], y_prob=val[1], **kwargs
         )
 
-        calibration = {
+        _calibration = {
             "calibration": [
                 {"prob_true": pt, "prob_pred": pp}
                 for pt, pp in zip(prob_true, prob_pred)
             ]
         }
-        self.write_json(calibration, self.output_path)
+        dump_json(_calibration, self.output_path)
diff --git a/src/dvclive/data/utils.py b/src/dvclive/plots/utils.py
similarity index 100%
rename from src/dvclive/data/utils.py
rename to src/dvclive/plots/utils.py
diff --git a/src/dvclive/report.py b/src/dvclive/report.py
index 54f3cc7e..0082dba9 100644
--- a/src/dvclive/report.py
+++ b/src/dvclive/report.py
@@ -8,8 +8,8 @@
 from dvc_render.table import TableRenderer
 from dvc_render.vega import VegaRenderer
 
-from dvclive.data import PLOTS, Image, Metric
-from dvclive.data.sklearn_plot import SKLearnPlot
+from dvclive.plots import SKLEARN_PLOTS, Image, Metric
+from dvclive.plots.sklearn import SKLearnPlot
 from dvclive.serialize import load_yaml
 from dvclive.utils import parse_tsv
 
@@ -38,12 +38,12 @@ def get_scalar_renderers(metrics_path):
 
 
 def get_image_renderers(images_folder):
-    dvclive_path = images_folder.parent
+    plots_path = images_folder.parent.parent
     renderers = []
     for suffix in Image.suffixes:
         all_images = Path(images_folder).rglob(f"*{suffix}")
         for file in sorted(all_images):
-            src = str(file.relative_to(dvclive_path))
+            src = str(file.relative_to(plots_path))
             name = str(file.relative_to(images_folder))
             data = [
                 {
@@ -65,7 +65,7 @@ def get_plot_renderers(plots_folder):
                 data = data[name]
             for row in data:
                 row["rev"] = "workspace"
-            properties = PLOTS[name].get_properties()
+            properties = SKLEARN_PLOTS[name].get_properties()
             renderers.append(VegaRenderer(data, name, **properties))
     return renderers
 
diff --git a/src/dvclive/serialize.py b/src/dvclive/serialize.py
index 3a286e5a..019923c4 100644
--- a/src/dvclive/serialize.py
+++ b/src/dvclive/serialize.py
@@ -1,3 +1,4 @@
+import json
 from collections import OrderedDict
 
 from dvclive.error import DvcLiveError
@@ -36,7 +37,12 @@ def _get_yaml():
     return yaml
 
 
-def dump_yaml(path, data):
+def dump_yaml(content, output_file):
     yaml = _get_yaml()
-    with open(path, "w", encoding="utf-8") as fd:
-        yaml.dump(data, fd)
+    with open(output_file, "w", encoding="utf-8") as fd:
+        yaml.dump(content, fd)
+
+
+def dump_json(content, output_file, indent=4, **kwargs):
+    with open(output_file, "w", encoding="utf-8") as f:
+        json.dump(content, f, indent=indent, **kwargs)
diff --git a/src/dvclive/utils.py b/src/dvclive/utils.py
index 73ebc23e..0ddecb83 100644
--- a/src/dvclive/utils.py
+++ b/src/dvclive/utils.py
@@ -23,6 +23,7 @@ def nested_set(d, keys, value):
     for key in keys[:-1]:
         d = d.setdefault(key, {})
     d[keys[-1]] = value
+    return d
 
 
 def nested_update(d, u):
@@ -108,7 +109,7 @@ def parse_json(path):
 
 
 def parse_metrics(live):
-    from .data import Metric
+    from .plots import Metric
 
     plots_path = Path(live.plots_path)
     history = {}
diff --git a/tests/plots/test_image.py b/tests/plots/test_image.py
new file mode 100644
index 00000000..310fd127
--- /dev/null
+++ b/tests/plots/test_image.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+from PIL import Image
+
+# pylint: disable=unused-argument
+from dvclive import Live
+from dvclive.plots import Image as LiveImage
+
+
+def test_PIL(tmp_dir):
+    live = Live()
+    img = Image.new("RGB", (10, 10), (250, 250, 250))
+    live.log_image("image.png", img)
+
+    assert (
+        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
+    ).exists()
+
+
+def test_invalid_extension(tmp_dir):
+    live = Live()
+    img = Image.new("RGB", (10, 10), (250, 250, 250))
+    with pytest.raises(ValueError):
+        live.log_image("image.foo", img)
+
+
+@pytest.mark.parametrize("shape", [(10, 10), (10, 10, 3), (10, 10, 4)])
+def test_numpy(tmp_dir, shape):
+    live = Live()
+    img = np.ones(shape, np.uint8) * 255
+    live.log_image("image.png", img)
+
+    assert (
+        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
+    ).exists()
+
+
+def test_override_on_step(tmp_dir):
+    live = Live()
+
+    zeros = np.zeros((2, 2, 3), np.uint8)
+    live.log_image("image.png", zeros)
+
+    live.next_step()
+
+    ones = np.ones((2, 2, 3), np.uint8)
+    live.log_image("image.png", ones)
+
+    img_path = tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
+    assert np.array_equal(np.array(Image.open(img_path)), ones)
+
+
+def test_cleanup(tmp_dir):
+    live = Live()
+    img = np.ones((10, 10, 3), np.uint8)
+    live.log_image("image.png", img)
+
+    assert (
+        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
+    ).exists()
+
+    Live()
+
+    assert not (tmp_dir / live.plots_path / LiveImage.subfolder).exists()
diff --git a/tests/test_data/test_scalar.py b/tests/plots/test_metric.py
similarity index 91%
rename from tests/test_data/test_scalar.py
rename to tests/plots/test_metric.py
index f585e91c..063882d3 100644
--- a/tests/test_data/test_scalar.py
+++ b/tests/plots/test_metric.py
@@ -5,8 +5,8 @@
 
 # pylint: disable=unused-argument
 from dvclive import Live
-from dvclive.data.metric import Metric
-from dvclive.data.utils import NUMPY_INTS, NUMPY_SCALARS
+from dvclive.plots.metric import Metric
+from dvclive.plots.utils import NUMPY_INTS, NUMPY_SCALARS
 from dvclive.utils import parse_tsv
 
 
diff --git a/tests/test_data/test_plot.py b/tests/plots/test_sklearn.py
similarity index 89%
rename from tests/test_data/test_plot.py
rename to tests/plots/test_sklearn.py
index 93ce9635..085916b9 100644
--- a/tests/test_data/test_plot.py
+++ b/tests/plots/test_sklearn.py
@@ -4,7 +4,7 @@
 from sklearn import calibration, metrics
 
 from dvclive import Live
-from dvclive.data.sklearn_plot import SKLearnPlot
+from dvclive.plots.sklearn import SKLearnPlot
 
 # pylint: disable=redefined-outer-name, unused-argument
 
@@ -99,19 +99,6 @@ def test_log_confusion_matrix(tmp_dir, y_true_y_pred_y_score, mocker):
     assert cm[0]["predicted"] == str(y_pred[0])
 
 
-def test_step_exception(tmp_dir, y_true_y_pred_y_score):
-    live = Live()
-    out = tmp_dir / live.plots_path / SKLearnPlot.subfolder
-
-    y_true, y_pred, _ = y_true_y_pred_y_score
-
-    live.log_sklearn_plot("confusion_matrix", y_true, y_pred)
-    assert (out / "confusion_matrix.json").exists()
-
-    with pytest.raises(NotImplementedError):
-        live.next_step()
-
-
 def test_dump_kwargs(tmp_dir, y_true_y_pred_y_score, mocker):
     live = Live()
 
@@ -124,6 +111,22 @@ def test_dump_kwargs(tmp_dir, y_true_y_pred_y_score, mocker):
     spy.assert_called_once_with(y_true, y_score, drop_intermediate=True)
 
 
+def test_override_on_step(tmp_dir):
+    live = Live()
+
+    live.log_sklearn_plot("confusion_matrix", [0, 0], [0, 0])
+    live.next_step()
+    live.log_sklearn_plot("confusion_matrix", [0, 0], [1, 1])
+
+    plot_path = tmp_dir / live.plots_path / SKLearnPlot.subfolder
+    plot_path = plot_path / "confusion_matrix.json"
+
+    assert json.loads(plot_path.read_text()) == [
+        {"actual": "0", "predicted": "1"},
+        {"actual": "0", "predicted": "1"},
+    ]
+
+
 def test_cleanup(tmp_dir, y_true_y_pred_y_score):
     live = Live()
     out = tmp_dir / live.plots_path / SKLearnPlot.subfolder
diff --git a/tests/test_catalyst.py b/tests/test_catalyst.py
index a3bb4aac..b4703b83 100644
--- a/tests/test_catalyst.py
+++ b/tests/test_catalyst.py
@@ -7,7 +7,7 @@
 
 from dvclive import Live
 from dvclive.catalyst import DvcLiveCallback
-from dvclive.data import Metric
+from dvclive.plots import Metric
 
 # pylint: disable=redefined-outer-name, unused-argument
 
diff --git a/tests/test_data/test_image.py b/tests/test_data/test_image.py
deleted file mode 100644
index 093060b4..00000000
--- a/tests/test_data/test_image.py
+++ /dev/null
@@ -1,91 +0,0 @@
-import numpy as np
-import pytest
-from PIL import Image
-
-# pylint: disable=unused-argument
-from dvclive import Live
-from dvclive.data import Image as LiveImage
-
-
-def test_PIL(tmp_dir):
-    live = Live()
-    img = Image.new("RGB", (500, 500), (250, 250, 250))
-    live.log_image("image.png", img)
-
-    assert (
-        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
-    ).exists()
-
-
-def test_invalid_extension(tmp_dir):
-    live = Live()
-    img = Image.new("RGB", (500, 500), (250, 250, 250))
-    with pytest.raises(ValueError):
-        live.log_image("image.foo", img)
-
-
-@pytest.mark.parametrize("shape", [(500, 500), (500, 500, 3), (500, 500, 4)])
-def test_numpy(tmp_dir, shape):
-    live = Live()
-    img = np.ones(shape, np.uint8) * 255
-    live.log_image("image.png", img)
-
-    assert (
-        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
-    ).exists()
-
-
-def test_step_formatting(tmp_dir):
-    live = Live()
-    img = np.ones((500, 500, 3), np.uint8)
-    for _ in range(3):
-        live.log_image("image.png", img)
-        live.next_step()
-
-    for step in range(3):
-        assert (
-            tmp_dir
-            / live.plots_path
-            / LiveImage.subfolder
-            / str(step)
-            / "image.png"
-        ).exists()
-
-
-def test_step_rename(tmp_dir, mocker):
-    from pathlib import Path
-
-    rename = mocker.spy(Path, "rename")
-    live = Live()
-    img = np.ones((500, 500, 3), np.uint8)
-    live.log_image("image.png", img)
-    assert (
-        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
-    ).exists()
-
-    live.next_step()
-
-    assert not (
-        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
-    ).exists()
-    assert (
-        tmp_dir / live.plots_path / LiveImage.subfolder / "0" / "image.png"
-    ).exists()
-    rename.assert_called_once_with(
-        Path(live.plots_path) / LiveImage.subfolder / "image.png",
-        Path(live.plots_path) / LiveImage.subfolder / "0" / "image.png",
-    )
-
-
-def test_cleanup(tmp_dir):
-    live = Live()
-    img = np.ones((500, 500, 3), np.uint8)
-    live.log_image("image.png", img)
-
-    assert (
-        tmp_dir / live.plots_path / LiveImage.subfolder / "image.png"
-    ).exists()
-
-    Live()
-
-    assert not (tmp_dir / live.plots_path / LiveImage.subfolder).exists()
diff --git a/tests/test_fastai.py b/tests/test_fastai.py
index f7cacf83..7eb0c6ce 100644
--- a/tests/test_fastai.py
+++ b/tests/test_fastai.py
@@ -11,8 +11,8 @@
 )
 
 from dvclive import Live
-from dvclive.data.metric import Metric
 from dvclive.fastai import DvcLiveCallback
+from dvclive.plots.metric import Metric
 
 # pylint: disable=redefined-outer-name, unused-argument
 
diff --git a/tests/test_huggingface.py b/tests/test_huggingface.py
index 2e1ce141..1ffae80e 100644
--- a/tests/test_huggingface.py
+++ b/tests/test_huggingface.py
@@ -12,8 +12,8 @@
 )
 
 from dvclive import Live
-from dvclive.data.metric import Metric
 from dvclive.huggingface import DvcLiveCallback
+from dvclive.plots.metric import Metric
 from dvclive.utils import parse_metrics
 
 # pylint: disable=redefined-outer-name, unused-argument, no-value-for-parameter
diff --git a/tests/test_keras.py b/tests/test_keras.py
index 33c9fe75..32b11bc8 100644
--- a/tests/test_keras.py
+++ b/tests/test_keras.py
@@ -3,8 +3,8 @@
 import pytest
 
 from dvclive import Live
-from dvclive.data.metric import Metric
 from dvclive.keras import DvcLiveCallback
+from dvclive.plots.metric import Metric
 from dvclive.utils import parse_metrics
 
 # pylint: disable=unused-argument, no-name-in-module, redefined-outer-name
diff --git a/tests/test_lightning.py b/tests/test_lightning.py
index 52dd6b22..d24ca05d 100644
--- a/tests/test_lightning.py
+++ b/tests/test_lightning.py
@@ -8,8 +8,8 @@
 from torch.optim import Adam
 from torch.utils.data import DataLoader, Dataset
 
-from dvclive.data.metric import Metric
 from dvclive.lightning import DvcLiveLogger
+from dvclive.plots.metric import Metric
 from dvclive.utils import parse_metrics
 
 # pylint: disable=redefined-outer-name, unused-argument
diff --git a/tests/test_main.py b/tests/test_main.py
index 80620837..05ee21a7 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -1,17 +1,18 @@
 # pylint: disable=protected-access
 # pylint: disable=unused-argument
+import json
 import os
 
 import pytest
 from funcy import last
 
 from dvclive import Live, env
-from dvclive.data import Metric
 from dvclive.error import (
     DataAlreadyLoggedError,
     InvalidDataTypeError,
     InvalidParameterTypeError,
 )
+from dvclive.plots import Metric
 from dvclive.serialize import load_yaml
 from dvclive.utils import parse_metrics
 
@@ -239,7 +240,7 @@ def test_require_step_update(tmp_dir, metric):
 
     with pytest.raises(
         DataAlreadyLoggedError,
-        match="has already been logged with step 'None'",
+        match="has already been logged with step '0'",
     ):
         dvclive.log(metric, 2.0)
 
@@ -340,3 +341,17 @@ def test_logger(tmp_dir, mocker, monkeypatch):
 
     live = Live(resume=True)
     logger.info.assert_called_with("Resumed from step 0")
+
+
+def test_make_summary_without_calling_log(tmp_dir):
+    dvclive = Live()
+
+    dvclive.summary["foo"] = 1.0
+    dvclive.make_summary()
+
+    assert json.loads((tmp_dir / dvclive.metrics_path).read_text()) == {
+        # no `step`
+        "foo": 1.0
+    }
+    log_file = tmp_dir / dvclive.plots_path / Metric.subfolder / "foo.tsv"
+    assert not log_file.exists()
diff --git a/tests/test_report.py b/tests/test_report.py
index 8ed4abe5..01a235e7 100644
--- a/tests/test_report.py
+++ b/tests/test_report.py
@@ -5,10 +5,10 @@
 from PIL import Image
 
 from dvclive import Live
-from dvclive.data import Image as LiveImage
-from dvclive.data import Metric
-from dvclive.data.sklearn_plot import ConfusionMatrix, SKLearnPlot
 from dvclive.env import DVCLIVE_OPEN
+from dvclive.plots import Image as LiveImage
+from dvclive.plots import Metric
+from dvclive.plots.sklearn import ConfusionMatrix, SKLearnPlot
 from dvclive.report import (
     get_image_renderers,
     get_metrics_renderers,
@@ -30,20 +30,18 @@ def test_get_renderers(tmp_dir, mocker):
         live.log_image("image.png", img)
         live.next_step()
 
-    live.set_step(None)
     live.log_sklearn_plot("confusion_matrix", [0, 0, 1, 1], [1, 0, 0, 1])
 
     image_renderers = get_image_renderers(
         tmp_dir / live.plots_path / LiveImage.subfolder
     )
-    assert len(image_renderers) == 2
-    image_renderers = sorted(
-        image_renderers, key=lambda x: x.datapoints[0]["rev"]
-    )
-    for n, renderer in enumerate(image_renderers):
-        assert renderer.datapoints == [
-            {"src": mocker.ANY, "rev": os.path.join(str(n), "image.png")}
-        ]
+    assert len(image_renderers) == 1
+    assert image_renderers[0].datapoints == [
+        {
+            "src": os.path.join("plots", LiveImage.subfolder, "image.png"),
+            "rev": "image.png",
+        }
+    ]
 
     scalar_renderers = get_scalar_renderers(
         tmp_dir / live.plots_path / Metric.subfolder
diff --git a/tests/test_studio.py b/tests/test_studio.py
index 4c18deab..44c082f3 100644
--- a/tests/test_studio.py
+++ b/tests/test_studio.py
@@ -5,7 +5,7 @@
 import pytest
 
 from dvclive import Live, env
-from dvclive.data import Metric
+from dvclive.plots import Metric
 
 
 @pytest.mark.studio