Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Image support #166

Merged
merged 36 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
52f99ec
Added image_pil and image_numpy
daavoo Sep 17, 2021
98803a2
Use DATA_TYPES list in metrics
daavoo Sep 17, 2021
4cf1df5
Use subdir structure
daavoo Sep 17, 2021
60f944d
Use data subdirs in init_path
daavoo Sep 17, 2021
062c8a2
Fix test_logging
daavoo Sep 20, 2021
51f67f2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 20, 2021
ac22e98
Fix tests
daavoo Sep 20, 2021
23078f5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 20, 2021
4fa57e0
Added test_image
daavoo Sep 20, 2021
979c55d
pre-commit
daavoo Sep 20, 2021
c911db2
Fix catalyst and fastai
daavoo Sep 20, 2021
01d1010
Make pillow optional dep
daavoo Sep 21, 2021
440f8a9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 21, 2021
cb2962e
Renamed scalar -> scalars
daavoo Sep 21, 2021
811b8c9
Merge branch 'image-support' of https://github.com/iterative/dvclive …
daavoo Sep 21, 2021
c2bda79
Raise exception
daavoo Sep 21, 2021
79f99a0
Fix pylint
daavoo Sep 21, 2021
9280015
Old summary
daavoo Sep 22, 2021
523d135
Removed subdirs
daavoo Sep 30, 2021
404beaa
Add image summary
daavoo Sep 30, 2021
48c8f9c
Fix test subdirs
daavoo Sep 30, 2021
5441e44
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 30, 2021
7295c9f
Include step in image summary
daavoo Sep 30, 2021
c1fccee
Merge branch 'image-support' of https://github.com/iterative/dvclive …
daavoo Sep 30, 2021
2b9cc80
lint
daavoo Sep 30, 2021
af2c270
Raise Error on lazy PIL import
daavoo Sep 30, 2021
720890c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Sep 30, 2021
5ca8323
Merge branch 'master' into image-support
daavoo Oct 4, 2021
18052a9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 4, 2021
5751203
Fix setup
daavoo Oct 4, 2021
dc6802c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 4, 2021
63fb269
Merge branch 'master' into image-support
daavoo Oct 27, 2021
91ba34e
Fixed merge
daavoo Oct 27, 2021
2b90ab0
Merge branch 'master' into image-support
daavoo Nov 8, 2021
1cb1883
Fixed tests
daavoo Nov 8, 2021
69beabc
Fixed step formatting
daavoo Nov 8, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion dvclive/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
from .scalar import Scalar # noqa: F401
from .image_numpy import ImageNumpy
from .image_pil import ImagePIL
from .scalar import Scalar

DATA_TYPES = [ImageNumpy, ImagePIL, Scalar]
19 changes: 19 additions & 0 deletions dvclive/data/image_numpy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from .image_pil import ImagePIL


class ImageNumpy(ImagePIL):
@staticmethod
def could_log(val: object) -> bool:
if val.__class__.__module__ == "numpy":
return True
return False

def dump(self, val, step) -> None:
from PIL import Image

val = Image.fromarray(val)
super().dump(val, step)

@property
def summary(self):
return {}
34 changes: 34 additions & 0 deletions dvclive/data/image_pil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from pathlib import Path

from .base import Data


class ImagePIL(Data):
subdir = "images"
suffixes = [".jpg", ".jpeg", ".gif", ".png"]

@staticmethod
def could_log(val: object) -> bool:
if val.__class__.__module__ == "PIL.Image":
return True
return False

@property
def output_path(self) -> Path:
if Path(self.name).suffix not in self.suffixes:
raise ValueError(
f"Invalid image suffix '{Path(self.name).suffix}'"
f" Must be one of {self.suffixes}"
)
return self.output_folder / self.subdir / self.name

def dump(self, val, step) -> None:
super().dump(val, step)
output_path = Path(str(self.output_path).format(step=step))
output_path.parent.mkdir(exist_ok=True, parents=True)

val.save(output_path)

@property
def summary(self):
return {}
daavoo marked this conversation as resolved.
Show resolved Hide resolved
5 changes: 4 additions & 1 deletion dvclive/data/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@


class Scalar(Data):
subdir = "scalars"
suffixes = [".csv", ".tsv"]

@staticmethod
def could_log(val: object) -> bool:
if isinstance(val, (int, float)):
Expand All @@ -18,7 +21,7 @@ def could_log(val: object) -> bool:

@property
def output_path(self) -> Path:
_path = self.output_folder / self.name
_path = self.output_folder / self.subdir / self.name
_path.parent.mkdir(exist_ok=True, parents=True)
return _path.with_suffix(".tsv")

Expand Down
34 changes: 24 additions & 10 deletions dvclive/metrics.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import json
import logging
import os
import shutil
from collections import OrderedDict
from pathlib import Path
from typing import Any, Dict, Union

from .data import Scalar
from .data import DATA_TYPES
from .dvc import make_checkpoint, make_html
from .error import InvalidDataTypeError

Expand Down Expand Up @@ -42,19 +43,29 @@ def __init__(

def _cleanup(self):

for dvclive_file in Path(self.dir).rglob("*.tsv"):
dvclive_file.unlink()
for data_type in DATA_TYPES:
subdir = Path(self.dir) / data_type.subdir
for suffix in data_type.suffixes:
for data_file in subdir.rglob(f"*{suffix}"):
data_file.unlink()

if os.path.exists(self.summary_path):
os.remove(self.summary_path)

if os.path.exists(self.html_path):
os.remove(self.html_path)
shutil.rmtree(self.html_path, ignore_errors=True)

def _init_paths(self):
os.makedirs(self.dir, exist_ok=True)
for data_type in DATA_TYPES:
os.makedirs(
os.path.join(self.dir, data_type.subdir), exist_ok=True
)

if self._summary:
self.make_summary()
if self._html:
os.makedirs(self.html_path, exist_ok=True)

@staticmethod
def from_env():
Expand Down Expand Up @@ -92,11 +103,11 @@ def exists(self):

@property
def summary_path(self):
return self.dir + ".json"
return os.path.join(self.dir, "summary.json")

@property
def html_path(self):
return self.dir + "_dvc_plots/index.html"
return os.path.join(self.dir, "html")

def get_step(self) -> int:
return self._step
Expand All @@ -115,15 +126,18 @@ def next_step(self):

def log(self, name: str, val: Union[int, float]):

data = None
if name in self._data:
data = self._data[name]
elif Scalar.could_log(val):
data = Scalar(name, self.dir)
self._data[name] = data
else:
for data_type in DATA_TYPES:
if data_type.could_log(val):
data = data_type(name, self.dir)
self._data[name] = data
if data is None:
raise InvalidDataTypeError(name, type(val))

data.dump(val, self._step)

if self._summary:
self.make_summary()

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,9 @@ def run(self):
hugginface = ["transformers", "datasets"]
catalyst = ["catalyst"]
fastai = ["fastai"]
image = ["pillow"]
daavoo marked this conversation as resolved.
Show resolved Hide resolved

all_libs = mmcv + tf + xgb + lgbm + hugginface + catalyst + fastai
all_libs = mmcv + tf + xgb + lgbm + hugginface + catalyst + fastai + image

tests_requires = [
"pylint==2.5.3",
Expand Down
5 changes: 3 additions & 2 deletions tests/test_catalyst.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import dvclive
from dvclive.catalyst import DvcLiveCallback
from dvclive.data import Scalar

# pylint: disable=redefined-outer-name, unused-argument

Expand Down Expand Up @@ -66,8 +67,8 @@ def test_catalyst_callback(tmp_dir, runner, loaders):

assert os.path.exists("dvc_logs")

train_path = tmp_dir / "dvc_logs/train"
valid_path = tmp_dir / "dvc_logs/valid"
train_path = tmp_dir / "dvc_logs" / Scalar.subdir / "train"
valid_path = tmp_dir / "dvc_logs" / Scalar.subdir / "valid"

assert train_path.is_dir()
assert valid_path.is_dir()
Expand Down
49 changes: 49 additions & 0 deletions tests/test_data/test_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os

import numpy as np
import pytest
from PIL import Image

# pylint: disable=unused-argument
import dvclive
from dvclive.data import ImageNumpy, ImagePIL


def test_PIL(tmp_dir):
logger = dvclive.init()
img = Image.new("RGB", (500, 500), (250, 250, 250))
dvclive.log("image.png", img)

assert (tmp_dir / logger.dir / ImagePIL.subdir / "image.png").exists()


def test_invalid_extension(tmp_dir):
dvclive.init()
img = Image.new("RGB", (500, 500), (250, 250, 250))
with pytest.raises(ValueError):
dvclive.log("image.foo", img)


@pytest.mark.parametrize("shape", [(500, 500), (500, 500, 3), (500, 500, 4)])
def test_numpy(tmp_dir, shape):
logger = dvclive.init()
img = np.ones(shape, np.uint8) * 255
dvclive.log("image.png", img)

assert (tmp_dir / logger.dir / ImageNumpy.subdir / "image.png").exists()


@pytest.mark.parametrize(
"pattern", ["image_{step}.png", str(os.path.join("{step}", "image.png"))]
)
def test_step_formatting(tmp_dir, pattern):
logger = dvclive.init()
img = np.ones((500, 500, 3), np.uint8)
for _ in range(3):
dvclive.log(pattern, img)
dvclive.next_step()

for step in range(3):
assert (
tmp_dir / logger.dir / ImagePIL.subdir / pattern.format(step=step)
).exists()
7 changes: 4 additions & 3 deletions tests/test_fastai.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)

import dvclive
from dvclive.data import Scalar
from dvclive.fastai import DvcLiveCallback

# pylint: disable=redefined-outer-name, unused-argument
Expand Down Expand Up @@ -49,12 +50,12 @@ def test_fastai_callback(tmp_dir, data_loader):

assert os.path.exists("dvc_logs")

train_path = tmp_dir / "dvc_logs/train"
valid_path = tmp_dir / "dvc_logs/valid"
train_path = tmp_dir / "dvc_logs" / Scalar.subdir / "train"
valid_path = tmp_dir / "dvc_logs" / Scalar.subdir / "valid"

assert train_path.is_dir()
assert valid_path.is_dir()
assert (tmp_dir / "dvc_logs/accuracy.tsv").exists()
assert (tmp_dir / "dvc_logs" / Scalar.subdir / "accuracy.tsv").exists()


def test_fastai_model_file(tmp_dir, data_loader):
Expand Down
44 changes: 24 additions & 20 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from dvclive import env

# pylint: disable=unused-argument
from dvclive.data import DATA_TYPES, Scalar
from dvclive.dvc import SIGNAL_FILE
from dvclive.error import (
ConfigMismatchError,
Expand All @@ -20,13 +21,16 @@


def read_logs(path: str):
assert os.path.isdir(path)
path = Path(path)
assert path.is_dir()
history = {}
for metric_file in Path(path).rglob("*.tsv"):
metric_name = str(metric_file).replace(path + os.path.sep, "")
for metric_file in (path / Scalar.subdir).rglob("*.tsv"):
metric_name = str(metric_file).replace(
str(path / Scalar.subdir) + os.path.sep, ""
)
metric_name = metric_name.replace(".tsv", "")
history[metric_name] = _parse_tsv(metric_file)
latest = _parse_json(path + ".json")
latest = _parse_json(os.path.join(path, "summary.json"))
return history, latest


Expand Down Expand Up @@ -57,21 +61,22 @@ def _parse_json(path):


@pytest.mark.parametrize("path", ["logs", os.path.join("subdir", "logs")])
def test_create_logs_dir(tmp_dir, path):
def test_init_paths(tmp_dir, path):
dvclive.init(path)

assert (tmp_dir / path).is_dir()
for data_type in DATA_TYPES:
assert (tmp_dir / path / data_type.subdir).is_dir()


@pytest.mark.parametrize("summary", [True, False])
def test_logging(tmp_dir, summary):
dvclive.init("logs", summary=summary)
logger = dvclive.init("logs", summary=summary)

dvclive.log("m1", 1)

assert (tmp_dir / "logs").is_dir()
assert (tmp_dir / "logs" / "m1.tsv").is_file()
assert (tmp_dir / "logs.json").is_file() == summary
assert (tmp_dir / "logs" / Scalar.subdir / "m1.tsv").is_file()
assert (tmp_dir / logger.summary_path).is_file() == summary

if summary:
_, s = read_logs("logs")
Expand All @@ -84,11 +89,11 @@ def test_nested_logging(tmp_dir):
dvclive.log("train/m1", 1)
dvclive.log("val/val_1/m1", 1)

assert (tmp_dir / "logs").is_dir()
assert (tmp_dir / "logs" / "train").is_dir()
assert (tmp_dir / "logs" / "val" / "val_1").is_dir()
assert (tmp_dir / "logs" / "train" / "m1.tsv").is_file()
assert (tmp_dir / "logs" / "val" / "val_1" / "m1.tsv").is_file()
assert (tmp_dir / "logs" / Scalar.subdir / "val" / "val_1").is_dir()
assert (tmp_dir / "logs" / Scalar.subdir / "train" / "m1.tsv").is_file()
assert (
tmp_dir / "logs" / Scalar.subdir / "val" / "val_1" / "m1.tsv"
).is_file()

_, summary = read_logs("logs")

Expand Down Expand Up @@ -127,22 +132,21 @@ def test_cleanup(tmp_dir, summary, html):
logger = dvclive.init("logs", summary=summary)
dvclive.log("m1", 1)

html_path = tmp_dir / logger.html_path
html_path = tmp_dir / logger.html_path / "index.html"
if html:
html_path.parent.mkdir()
html_path.touch()

(tmp_dir / "logs" / "some_user_file.txt").touch()

assert (tmp_dir / "logs" / "m1.tsv").is_file()
assert (tmp_dir / "logs.json").is_file() == summary
assert (tmp_dir / "logs" / Scalar.subdir / "m1.tsv").is_file()
assert (tmp_dir / logger.summary_path).is_file() == summary
assert html_path.is_file() == html

dvclive.init("logs", summary=summary)

assert (tmp_dir / "logs" / "some_user_file.txt").is_file()
assert not (tmp_dir / "logs" / "m1.tsv").is_file()
assert (tmp_dir / "logs.json").is_file() == summary
assert not (tmp_dir / "logs" / Scalar.subdir / "m1.tsv").is_file()
assert (tmp_dir / logger.summary_path).is_file() == summary
assert not (html_path).is_file()


Expand Down