From e849162018b32865574b575ec4039bb3424e5fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Thu, 19 May 2022 14:45:10 +0200 Subject: [PATCH] exp: speed up repro execution with untracked directories in workspace When large untracked directories are present in the workspace, a lot of time is spent collecting untracked files in `scm.status` and `scm.is_dirty`. --- dvc/repo/experiments/__init__.py | 4 ++-- dvc/repo/experiments/executor/base.py | 2 +- dvc/repo/plots/diff.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dvc/repo/experiments/__init__.py b/dvc/repo/experiments/__init__.py index 48c07adca6..5d6e9c93cd 100644 --- a/dvc/repo/experiments/__init__.py +++ b/dvc/repo/experiments/__init__.py @@ -196,7 +196,7 @@ def _stash_exp( branch_name = ExpRefInfo.from_ref(branch).name else: branch_name = f"{resume_rev[:7]}" - if self.scm.is_dirty(): + if self.scm.is_dirty(untracked_files=False): logger.info( "Modified checkpoint experiment based on " "'%s' will be created", @@ -398,7 +398,7 @@ def reproduce_one( self.reset_checkpoints() if not (queue or tmp_dir or machine): - staged, _, _ = self.scm.status() + staged, _, _ = self.scm.status(untracked_files="no") if staged: logger.warning( "Your workspace contains staged Git changes which will be " diff --git a/dvc/repo/experiments/executor/base.py b/dvc/repo/experiments/executor/base.py index a5185a74cf..f4d23a5411 100644 --- a/dvc/repo/experiments/executor/base.py +++ b/dvc/repo/experiments/executor/base.py @@ -640,7 +640,7 @@ def commit( ): """Commit stages as an experiment and return the commit SHA.""" rev = scm.get_rev() - if not scm.is_dirty(): + if not scm.is_dirty(untracked_files=False): logger.debug("No changes to commit") raise UnchangedExperimentError(rev) diff --git a/dvc/repo/plots/diff.py b/dvc/repo/plots/diff.py index 73dfabcfb7..7ffd18e862 100644 --- a/dvc/repo/plots/diff.py +++ b/dvc/repo/plots/diff.py @@ -5,7 +5,7 @@ def _revisions(repo, revs, experiment): if baseline: revisions.append(baseline[:7]) if len(revisions) <= 1: - if len(revisions) == 0 and repo.scm.is_dirty(): + if len(revisions) == 0 and repo.scm.is_dirty(untracked_files=False): revisions.append("HEAD") revisions.append("workspace") return revisions