Skip to content

Commit

Permalink
live: add tests for cache use case
Browse files Browse the repository at this point in the history
  • Loading branch information
pared committed Feb 3, 2021
1 parent eb4681e commit a3ea11b
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 138 deletions.
11 changes: 5 additions & 6 deletions dvc/repo/reproduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,12 @@ def _track_stage(stage):
for out in stage.outs:
if not out.use_scm_ignore and out.is_in_repo:
stage.repo.scm.track_file(relpath(out.path_info))
if out.live:
from dvc.repo.live import summary_path_info
if out.live:
from dvc.repo.live import summary_path_info

summary = summary_path_info(out)
# TODO mark tracking live
if summary:
stage.repo.scm.track_file(relpath(summary))
summary = summary_path_info(out)
if summary:
stage.repo.scm.track_file(relpath(summary))
stage.repo.scm.track_changed_files()


Expand Down
2 changes: 1 addition & 1 deletion dvc/stage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def _load_live_output(
outs += loads_from(
stage,
[path],
use_cache=bool(live_no_cache),
use_cache=not bool(live_no_cache),
live={
BaseOutput.PARAM_LIVE_SUMMARY: live_summary,
BaseOutput.PARAM_LIVE_HTML: live_html,
Expand Down
198 changes: 67 additions & 131 deletions tests/func/test_live.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,46 @@
dvclive.next_step()"""
)

LIVE_CHECKPOINT_SCRIPT = dedent(
"""
import os
import dvclive
def read(path):
value=0
if os.path.exists(path):
with open(path, 'r') as fobj:
try:
value = int(fobj.read())
except ValueError:
pass
return value
def dump(value, path):
with open(path, "w") as fobj:
fobj.write(str(value))
r = 3
checkpoint_file = "checkpoint"
value = read(checkpoint_file)
for i in range(1,r):
m = i + value
dump(m, checkpoint_file)
dvclive.log("metric1", m)
dvclive.log("metric2", m * 2)
dvclive.next_step()"""
)


@pytest.fixture
def live_stage(tmp_dir, scm, dvc):

# pytest.skip("dvclive does not exist yet")
pytest.skip("dvclive does not exist yet")

def make(summary=True, html=True, live=None, live_no_cache=None):
assert not (live and live_no_cache)
assert bool(live) != bool(live_no_cache)
tmp_dir.gen("train.py", LIVE_SCRITP)
tmp_dir.gen("params.yaml", "foo: 1")
stage = dvc.run(
Expand Down Expand Up @@ -88,9 +120,8 @@ def test_export_config(tmp_dir, dvc, mocker, summary, live_stage):
assert kwargs["env"]["DVCLIVE_SUMMARY"] == str(int(summary))


@pytest.mark.parametrize("typ", ("live", "live_no_cache"))
def test_live_provides_metrics(tmp_dir, dvc, live_stage, typ):
live_stage(summary=True, **{typ: "logs"})
def test_live_provides_metrics(tmp_dir, dvc, live_stage):
live_stage(summary=True, live="logs")

assert (tmp_dir / "logs.json").is_file()
assert dvc.metrics.show() == {
Expand All @@ -103,9 +134,8 @@ def test_live_provides_metrics(tmp_dir, dvc, live_stage, typ):
assert "logs/loss.tsv" in plots


@pytest.mark.parametrize("typ", ("live", "live_no_cache"))
def test_live_provides_no_metrics(tmp_dir, dvc, live_stage, typ):
live_stage(summary=False, **{typ: "logs"})
def test_live_provides_no_metrics(tmp_dir, dvc, live_stage):
live_stage(summary=False, live="logs")

assert not (tmp_dir / "logs.json").is_file()
with pytest.raises(MetricsError):
Expand All @@ -117,8 +147,9 @@ def test_live_provides_no_metrics(tmp_dir, dvc, live_stage, typ):
assert "logs/loss.tsv" in plots


def test_experiments_track_summary(tmp_dir, scm, dvc, live_stage):
live_stage(summary=True, live="logs")
@pytest.mark.parametrize("typ", ("live", "live_no_cache"))
def test_experiments_track_summary(tmp_dir, scm, dvc, live_stage, typ):
live_stage(summary=True, **{typ: "logs"})
baseline_rev = scm.get_rev()

experiments = dvc.experiments.run(targets=["live_stage"], params=["foo=2"])
Expand All @@ -139,55 +170,29 @@ def test_live_html(tmp_dir, dvc, live_stage, html):
@pytest.fixture
def live_checkpoint_stage(tmp_dir, scm, dvc):

# pytest.skip("dvclive does not exist yet")

SCRIPT = dedent(
"""
import os
import dvclive
def read(path):
value=0
if os.path.exists(path):
with open(path, 'r') as fobj:
try:
value = int(fobj.read())
except ValueError:
pass
return value
def dump(value, path):
with open(path, "w") as fobj:
fobj.write(str(value))
pytest.skip("dvclive does not exist yet")

r = 3
checkpoint_file = "checkpoint"
def make(live=None, live_no_cache=None):
assert bool(live) != bool(live_no_cache)

value = read(checkpoint_file)
for i in range(1,r):
m = i + value
dump(m, checkpoint_file)
dvclive.log("metric1", m)
dvclive.log("metric2", m * 2)
dvclive.next_step()"""
)
tmp_dir.gen("train.py", LIVE_CHECKPOINT_SCRIPT)
tmp_dir.gen("params.yaml", "foo: 1")
stage = dvc.run(
cmd="python train.py",
params=["foo"],
deps=["train.py"],
name="live_stage",
live=live,
live_no_cache=live_no_cache,
checkpoints=["checkpoint"],
no_exec=True,
)

tmp_dir.gen("train.py", SCRIPT)
tmp_dir.gen("params.yaml", "foo: 1")
stage = dvc.run(
cmd="python train.py",
params=["foo"],
deps=["train.py"],
name="live_stage",
live="logs",
checkpoints=["checkpoint"],
no_exec=True,
)
scm.add(["dvc.yaml", "train.py", "params.yaml", ".gitignore"])
scm.commit("initial: live_stage")
return stage

scm.add(["dvc.yaml", "train.py", "params.yaml", ".gitignore"])
scm.commit("initial: live_stage")
yield stage
yield make


def checkpoints_metric(show_results, metric_file, metric_name):
Expand All @@ -203,17 +208,19 @@ def checkpoints_metric(show_results, metric_file, metric_name):
)


def test_live_checkpoints_resume(tmp_dir, scm, dvc, live_checkpoint_stage):
@pytest.mark.parametrize("typ", ("live", "live_no_cache"))
def test_live_checkpoints_resume(
tmp_dir, scm, dvc, live_checkpoint_stage, typ
):
stage = live_checkpoint_stage(**{typ: "logs"})
results = dvc.experiments.run(
live_checkpoint_stage.addressing, params=["foo=2"], tmp_dir=False
stage.addressing, params=["foo=2"], tmp_dir=False
)

checkpoint_resume = first(results)

dvc.experiments.run(
live_checkpoint_stage.addressing,
checkpoint_resume=checkpoint_resume,
tmp_dir=False,
stage.addressing, checkpoint_resume=checkpoint_resume, tmp_dir=False,
)

results = dvc.experiments.show()
Expand All @@ -235,74 +242,3 @@ def test_live_checkpoints_resume(tmp_dir, scm, dvc, live_checkpoint_stage):
4,
2,
]


CHECKPOINT_SCRIPT_FORMAT = dedent(
"""\
import os
import sys
import shutil
from time import sleep
from dvc.api import make_checkpoint
checkpoint_file = {}
checkpoint_iterations = int({})
if os.path.exists(checkpoint_file):
with open(checkpoint_file) as fobj:
try:
value = int(fobj.read())
except ValueError:
value = 0
else:
with open(checkpoint_file, "w"):
pass
value = 0
shutil.copyfile({}, {})
if os.getenv("DVC_CHECKPOINT"):
for _ in range(checkpoint_iterations):
value += 1
with open(checkpoint_file, "w") as fobj:
fobj.write(str(value))
make_checkpoint()
"""
)
CHECKPOINT_SCRIPT = CHECKPOINT_SCRIPT_FORMAT.format(
"sys.argv[1]", "sys.argv[2]", "sys.argv[3]", "sys.argv[4]"
)


@pytest.fixture
def checkpoint_stage(tmp_dir, scm, dvc):
tmp_dir.gen("checkpoint.py", CHECKPOINT_SCRIPT)
tmp_dir.gen("params.yaml", "foo: 1")
stage = dvc.run(
cmd="python checkpoint.py foo 5 params.yaml metrics.yaml",
metrics_no_cache=["metrics.yaml"],
params=["foo"],
checkpoints=["foo"],
deps=["checkpoint.py"],
no_exec=True,
name="checkpoint-file",
)
scm.add(["dvc.yaml", "checkpoint.py", "params.yaml", ".gitignore"])
scm.commit("init")
return stage


def test_metrics_behaviour(tmp_dir, scm, dvc, checkpoint_stage):
results = dvc.experiments.run(
checkpoint_stage.addressing, params=["foo=2"], tmp_dir=False
)

checkpoint_resume = first(results)

dvc.experiments.run(
checkpoint_stage.addressing,
checkpoint_resume=checkpoint_resume,
tmp_dir=False,
)

results = dvc.experiments.show()

0 comments on commit a3ea11b

Please sign in to comment.