Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Push running checkpoint to remote #6332

Merged
merged 24 commits into from
Aug 12, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
14175c8
Push running checkpoint to remote
karajan1001 Jul 19, 2021
770e5bc
Update dvc/repo/experiments/executor/base.py
karajan1001 Jul 20, 2021
15036f3
Get the branch name
karajan1001 Jul 21, 2021
205ce57
Finish this PR
karajan1001 Jul 26, 2021
b6f147a
Update after finally commit
karajan1001 Jul 27, 2021
65eb8d6
Some problems found in review
karajan1001 Jul 27, 2021
5077862
Update dvc/repo/experiments/executor/base.py
karajan1001 Jul 27, 2021
dd7b380
add remote check before auto push
karajan1001 Aug 2, 2021
92d9b07
Split current one env into two
karajan1001 Aug 4, 2021
7c271b3
Rename DVC_EXP_CHECKPOINT_PUSH to DVC_EXP_AUTO_PUSH
karajan1001 Aug 5, 2021
34f4da3
value error
karajan1001 Aug 5, 2021
37d2150
Remove getenv_bool switch to env2bool
karajan1001 Aug 6, 2021
0cfd8d1
Better on handling dulwich exceptions
karajan1001 Aug 6, 2021
ce7cd7d
Name changing removing prints
karajan1001 Aug 6, 2021
e666d2e
Update dvc/env.py
karajan1001 Aug 9, 2021
50d419f
Some changes in review
karajan1001 Aug 9, 2021
5cf541c
Add validation to repo url
karajan1001 Aug 10, 2021
dfcc946
Add new type of SCMError
karajan1001 Aug 10, 2021
060ec01
Update dvc/repo/experiments/executor/base.py
karajan1001 Aug 10, 2021
ffb657a
Use spy tests call counts and args
karajan1001 Aug 10, 2021
41aeaaf
Update dvc/scm/git/backend/base.py
karajan1001 Aug 11, 2021
488c7f3
Update dvc/scm/git/backend/dulwich.py
karajan1001 Aug 11, 2021
4a16226
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Aug 11, 2021
32677a2
Remove validate_git_repo to validate_git_remote
karajan1001 Aug 11, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dvc/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
DVCLIVE_HTML = "DVCLIVE_HTML"
DVCLIVE_RESUME = "DVCLIVE_RESUME"
DVC_IGNORE_ISATTY = "DVC_IGNORE_ISATTY"
DVC_EXP_AUTO_PUSH = "DVC_EXP_AUTO_PUSH"
31 changes: 30 additions & 1 deletion dvc/repo/experiments/executor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@

from funcy import cached_property

from dvc.env import DVC_EXP_AUTO_PUSH
from dvc.exceptions import DvcException
from dvc.path_info import PathInfo
from dvc.repo import Repo
from dvc.repo.experiments.base import (
EXEC_BASELINE,
EXEC_BRANCH,
Expand Down Expand Up @@ -331,6 +333,7 @@ def filter_pipeline(stages):

checkpoint_func = partial(
cls.checkpoint_callback,
dvc,
dvc.scm,
name,
repro_force or checkpoint_reset,
Expand Down Expand Up @@ -393,7 +396,6 @@ def _repro_dvc(
git_url: Optional[str] = None,
**kwargs,
):
from dvc.repo import Repo
from dvc.utils.serialize import modify_yaml

dvc = Repo(dvc_dir)
Expand Down Expand Up @@ -450,9 +452,32 @@ def _repro_args(cls, dvc):
kwargs = {}
return args, kwargs

@staticmethod
def _auto_push(git_remote: str, dvc: "Repo", scm: "Git"):
from dvc.repo.experiments.push import push

if git_remote == scm.root_dir:
logger.warning(
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved
f"{DVC_EXP_AUTO_PUSH} {git_remote} is the running "
"repository auto push will not work"
)
return
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved

branch = scm.get_ref(EXEC_BRANCH, follow=False)
branch_name = ExpRefInfo.from_ref(branch).name
push(
dvc,
git_remote,
branch_name,
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved
push_cache=True,
run_cache=True,
)
logger.info({"pushed": branch_name})
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
def checkpoint_callback(
cls,
dvc: "Repo",
scm: "Git",
name: Optional[str],
force: bool,
Expand All @@ -464,6 +489,10 @@ def checkpoint_callback(
exp_rev = cls.commit(
scm, exp_hash, exp_name=name, force=force, checkpoint=True
)

git_remote = os.environ.get(DVC_EXP_AUTO_PUSH, None)
if git_remote:
cls._auto_push(git_remote, dvc, scm)
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved
logger.info("Checkpoint experiment iteration '%s'.", exp_rev[:7])
except UnchangedExperimentError:
pass
Expand Down
18 changes: 18 additions & 0 deletions tests/func/experiments/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,21 @@ def checkpoint_stage(tmp_dir, scm, dvc, mocker):
scm.commit("init")
stage.iterations = DEFAULT_ITERATIONS
return stage


@pytest.fixture
def git_upstream(tmp_dir, erepo_dir):
url = "file://{}".format(erepo_dir.resolve().as_posix())
tmp_dir.scm.gitpython.repo.create_remote("upstream", url)
erepo_dir.remote = "upstream"
erepo_dir.url = url
return erepo_dir


@pytest.fixture
def git_downstream(tmp_dir, erepo_dir):
url = "file://{}".format(tmp_dir.resolve().as_posix())
erepo_dir.scm.gitpython.repo.create_remote("upstream", url)
erepo_dir.remote = "upstream"
erepo_dir.url = url
return erepo_dir
74 changes: 74 additions & 0 deletions tests/func/experiments/test_checkpoints.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
import logging
import os

import pytest
from funcy import first

from dvc.exceptions import DvcException
from dvc.repo.experiments import MultipleBranchError
from dvc.repo.experiments.base import EXEC_APPLY, EXEC_CHECKPOINT
from dvc.repo.experiments.utils import exp_refs_by_rev


@pytest.mark.parametrize("workspace", [True, False])
Expand Down Expand Up @@ -188,3 +192,73 @@ def test_resume_non_head_checkpoint(
)
new_head = first(results)
assert orig_branch != dvc.experiments.get_branch_by_rev(new_head)


@pytest.mark.parametrize("use_url", [True, False])
def test_auto_push_during_iterations(
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved
tmp_dir, scm, dvc, checkpoint_stage, git_upstream, local_remote, use_url
):
# set up remote repo
remote = git_upstream.url if use_url else git_upstream.remote
git_upstream.scm.fetch_refspecs(str(tmp_dir), ["master:master"])

# without auto push
results = dvc.experiments.run(checkpoint_stage.addressing)
exp = first(results)
ref_info = first(exp_refs_by_rev(scm, exp))
assert git_upstream.scm.get_ref(str(ref_info)) is None

# add auto push
os.environ["DVC_EXP_AUTO_PUSH"] = remote
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved
results = dvc.experiments.run(checkpoint_stage.addressing)
assert (tmp_dir / "foo").read_text() == "4"
casperdcl marked this conversation as resolved.
Show resolved Hide resolved
exp = first(results)
ref_info = first(exp_refs_by_rev(scm, exp))
assert git_upstream.scm.get_ref(str(ref_info)) == exp

# check the data
with git_upstream.dvc.config.edit() as conf:
conf["remote"]["local"] = local_remote.config
conf["core"]["remote"] = "local"

git_upstream.dvc.experiments.apply(ref_info.name)
git_upstream.dvc.experiments.apply(exp)
git_upstream.dvc.pull()
assert (git_upstream / "foo").read_text() == "4"

# resume the remote checkpoint
os.environ.pop("DVC_EXP_AUTO_PUSH")
with git_upstream.chdir():
git_upstream.dvc.experiments.run(checkpoint_stage.addressing)
assert (git_upstream / "foo").read_text() == "6"


def test_auto_push_error_url(dvc, scm, checkpoint_stage, local_remote):
os.environ["DVC_EXP_AUTO_PUSH"] = "none"
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved
assert (
dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"])
== {}
)


def test_auto_push_no_remote(dvc, scm, checkpoint_stage, git_upstream):
os.environ["DVC_EXP_AUTO_PUSH"] = git_upstream.url
assert (
dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"])
== {}
)
karajan1001 marked this conversation as resolved.
Show resolved Hide resolved


def test_auto_push_self_remote(tmp_dir, dvc, scm, checkpoint_stage, caplog):
root_dir = str(tmp_dir)
os.environ["DVC_EXP_AUTO_PUSH"] = root_dir
assert (
dvc.experiments.run(checkpoint_stage.addressing, params=["foo=2"])
!= {}
)

with caplog.at_level(logging.WARNING, logger="dvc"):
assert (
f"DVC_EXP_AUTO_PUSH {root_dir} is the running "
"repository auto push will not work" in caplog.messages
)
18 changes: 0 additions & 18 deletions tests/func/experiments/test_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,6 @@
from dvc.repo.experiments.utils import exp_refs_by_rev


@pytest.fixture
def git_upstream(tmp_dir, erepo_dir):
url = f"file://{erepo_dir.resolve().as_posix()}"
tmp_dir.scm.gitpython.repo.create_remote("upstream", url)
erepo_dir.remote = "upstream"
erepo_dir.url = url
return erepo_dir


@pytest.fixture
def git_downstream(tmp_dir, erepo_dir):
url = f"file://{tmp_dir.resolve().as_posix()}"
erepo_dir.scm.gitpython.repo.create_remote("upstream", url)
erepo_dir.remote = "upstream"
erepo_dir.url = url
return erepo_dir


@pytest.mark.parametrize("use_url", [True, False])
def test_push(tmp_dir, scm, dvc, git_upstream, exp_stage, use_url):
from dvc.exceptions import InvalidArgumentError
Expand Down