From a036eaf8171eab177e5a2f579835fd223e317609 Mon Sep 17 00:00:00 2001 From: David de la Iglesia Castro Date: Mon, 28 Nov 2022 15:24:06 +0100 Subject: [PATCH] exp: save: Add `include_untracked`. Allow to include a list of potentially untracked files. Covers the DVCLive use case where in the first run the generated files won't be tracked in Git yet. --- dvc/commands/experiments/save.py | 12 ++++++++- dvc/repo/experiments/executor/local.py | 9 ++++--- dvc/repo/experiments/save.py | 6 ++++- tests/func/experiments/test_save.py | 37 ++++++++++++-------------- tests/unit/command/test_experiments.py | 4 ++- 5 files changed, 42 insertions(+), 26 deletions(-) diff --git a/dvc/commands/experiments/save.py b/dvc/commands/experiments/save.py index 2c7f3763fe..20a5ec29da 100644 --- a/dvc/commands/experiments/save.py +++ b/dvc/commands/experiments/save.py @@ -14,7 +14,9 @@ def run(self): try: ref = self.repo.experiments.save( - name=self.args.name, force=self.args.force + name=self.args.name, + force=self.args.force, + include_untracked=self.args.include_untracked, ) except DvcException: logger.exception("failed to save experiment") @@ -66,4 +68,12 @@ def add_parser(experiments_subparsers, parent_parser): ), metavar="", ) + save_parser.add_argument( + "-I", + "--include-untracked", + action="append", + default=[], + help="List of untracked paths to include in the experiment.", + metavar="", + ) save_parser.set_defaults(func=CmdExperimentsSave) diff --git a/dvc/repo/experiments/executor/local.py b/dvc/repo/experiments/executor/local.py index 2aaa43cc6a..cbf725c4d2 100644 --- a/dvc/repo/experiments/executor/local.py +++ b/dvc/repo/experiments/executor/local.py @@ -2,14 +2,14 @@ import os from contextlib import ExitStack from tempfile import mkdtemp -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, List, Optional from funcy import cached_property, retry from scmrepo.exceptions import SCMError as _SCMError from shortuuid import uuid -from dvc.lock import LockError from dvc.exceptions import DvcException +from dvc.lock import LockError from dvc.scm import SCM, GitMergeError from dvc.utils.fs import makedirs, remove @@ -25,7 +25,7 @@ ExpRefInfo, ) from ..utils import EXEC_TMP_DIR, get_exp_rwlock -from .base import BaseExecutor, TaskStatus, ExecutorResult +from .base import BaseExecutor, ExecutorResult, TaskStatus if TYPE_CHECKING: from scmrepo.git import Git @@ -255,6 +255,7 @@ def save( cls, info: "ExecutorInfo", force: bool = False, + include_untracked: Optional[List[str]] = None, ) -> ExecutorResult: from dvc.repo import Repo @@ -271,6 +272,8 @@ def save( try: stages = dvc.commit([], force=force) exp_hash = cls.hash_exp(stages) + if include_untracked: + dvc.scm.add(include_untracked) cls.commit( dvc.scm, exp_hash, diff --git a/dvc/repo/experiments/save.py b/dvc/repo/experiments/save.py index 692a4b6c7c..260521aa8e 100644 --- a/dvc/repo/experiments/save.py +++ b/dvc/repo/experiments/save.py @@ -15,6 +15,7 @@ def save( repo: "Repo", name: Optional[str] = None, force: bool = False, + include_untracked: Optional[List[str]] = None, ) -> Optional[str]: """Save the current workspace status as an experiment. @@ -33,7 +34,10 @@ def save( entry = repo.experiments.new(queue=queue, name=name, force=force) executor = queue.init_executor(repo.experiments, entry) - save_result = executor.save(executor.info, force=force) + + save_result = executor.save( + executor.info, force=force, include_untracked=include_untracked + ) result = queue.collect_executor(repo.experiments, executor, save_result) exp_rev = first(result) diff --git a/tests/func/experiments/test_save.py b/tests/func/experiments/test_save.py index 2427a00dda..7ad61aa2e4 100644 --- a/tests/func/experiments/test_save.py +++ b/tests/func/experiments/test_save.py @@ -49,33 +49,16 @@ def test_exp_save_overwrite_experiment(tmp_dir, dvc, scm, exp_stage): dvc.experiments.save(name="dummy", force=True) -def test_exp_save_multiple(tmp_dir, dvc, scm): - baseline = scm.get_rev() - for i in range(2): - name = f"exp-{i}" - tmp_dir.gen({name: f"{name} content"}) - dvc.experiments.save(name=name) - - assert dvc.experiments.ls()[baseline] == ["exp-0", "exp-1"] - - for i in range(2): - scm.reset(hard=True) - name = f"exp-{i}" - dvc.experiments.apply(name) - assert (tmp_dir / name).read_text() == f"{name} content" - - def test_exp_save_after_commit(tmp_dir, dvc, scm, exp_stage): baseline = scm.get_rev() dvc.experiments.save(name="exp-1") tmp_dir.scm_gen({"new_file": "new_file"}, commit="new baseline") - new_baseline = scm.get_rev() dvc.experiments.save(name="exp-2") all_exps = dvc.experiments.ls(all_commits=True) - assert all_exps[baseline] == ["exp-1"] - assert all_exps[new_baseline] == ["exp-2"] + assert all_exps[baseline[:7]] == ["exp-1"] + assert all_exps["master"] == ["exp-2"] def test_exp_save_with_staged_changes(tmp_dir, dvc, scm): @@ -85,4 +68,18 @@ def test_exp_save_with_staged_changes(tmp_dir, dvc, scm): dvc.experiments.save(name="exp") _, _, unstaged = scm.status() - assert "new_file" in unstaged \ No newline at end of file + assert "new_file" in unstaged + + +def test_exp_save_include_untracked(tmp_dir, dvc, scm, exp_stage): + new_file = tmp_dir / "new_file" + for i in range(2): + new_file.write_text(f"exp-{i}") + dvc.experiments.save(name=f"exp-{i}", include_untracked=["new_file"]) + + _, _, unstaged = scm.status() + assert "new_file" in unstaged + assert new_file.read_text() == f"exp-{i}" + + dvc.experiments.apply("exp-0") + assert new_file.read_text() == "exp-0" diff --git a/tests/unit/command/test_experiments.py b/tests/unit/command/test_experiments.py index e906411f12..36ff5c352e 100644 --- a/tests/unit/command/test_experiments.py +++ b/tests/unit/command/test_experiments.py @@ -946,4 +946,6 @@ def test_experiments_save(dvc, scm, mocker): assert cmd.run() == 0 - m.assert_called_once_with(cmd.repo, name="exp-name", force=True) + m.assert_called_once_with( + cmd.repo, name="exp-name", force=True, include_untracked=[] + )