Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get: use StateBase #2323

Merged
merged 1 commit into from
Jul 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,29 @@ def __init__(self, url, rev, cause):
)


def _clone(url=None, rev=None, rev_lock=None, cache_dir=None):
import git
def _clone(cache_dir=None, **kwargs):
from dvc.repo import Repo

_path = tempfile.mkdtemp("dvc-repo")

_clone_git_repo(_path, **kwargs)

if cache_dir:
repo = Repo(_path)
cache_config = CacheConfig(repo.config)
cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
repo.scm.git.close()

return Repo(_path)


def _clone_git_repo(to_path, url=None, rev=None, rev_lock=None):
import git

try:
repo = git.Repo.clone_from(url, _path, no_single_branch=True)
repo = git.Repo.clone_from(url, to_path, no_single_branch=True)
except git.exc.GitCommandError as exc:
raise CloneError(url, _path, exc)

raise CloneError(url, to_path, exc)
try:
revision = rev_lock or rev
if revision:
Expand All @@ -56,14 +68,6 @@ def _clone(url=None, rev=None, rev_lock=None, cache_dir=None):
finally:
repo.close()

if cache_dir:
repo = Repo(_path)
cache_config = CacheConfig(repo.config)
cache_config.set_dir(cache_dir, level=Config.LEVEL_LOCAL)
repo.scm.git.close()

return Repo(_path)


def _remove(repo):
repo.scm.git.close()
Expand Down
5 changes: 3 additions & 2 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from dvc.progress import progress, ProgressCallback
from dvc.utils import LARGE_DIR_SIZE, tmp_fname, to_chunks, move, relpath
from dvc.state import StateBase
from dvc.state import StateNoop
from dvc.path_info import PathInfo, URLInfo


Expand Down Expand Up @@ -78,6 +78,8 @@ class RemoteBASE(object):
CHECKSUM_DIR_SUFFIX = ".dir"
CHECKSUM_JOBS = max(1, min(4, cpu_count() // 2))

state = StateNoop()

def __init__(self, repo, config):
self.repo = repo
deps_ok = all(self.REQUIRES.values())
Expand Down Expand Up @@ -113,7 +115,6 @@ def __init__(self, repo, config):

self.protected = False
self.no_traverse = config.get(Config.SECTION_REMOTE_NO_TRAVERSE)
self.state = StateBase()
self._dir_info = {}

def __repr__(self):
Expand Down
5 changes: 4 additions & 1 deletion dvc/remote/local/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class RemoteLOCAL(RemoteBASE):

def __init__(self, repo, config):
super(RemoteLOCAL, self).__init__(repo, config)
self.state = self.repo.state if self.repo else None
self.protected = config.get(Config.SECTION_CACHE_PROTECTED, False)

types = config.get(Config.SECTION_CACHE_TYPE, None)
Expand All @@ -83,6 +82,10 @@ def __init__(self, repo, config):
self.path_info = PathInfo(cache_dir) if cache_dir else None
self._dir_info = {}

@property
def state(self):
return self.repo.state

@property
def cache_dir(self):
return self.path_info.fspath if self.path_info else None
Expand Down
3 changes: 1 addition & 2 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,11 @@ def _ignore(self):
updater = Updater(self.dvc_dir)

flist = [
self.state.state_file,
self.lock.lock_file,
self.config.config_local_file,
updater.updater_file,
updater.lock.lock_file,
] + self.state.temp_files
] + self.state.files

if self.cache.local.cache_dir.startswith(self.root_dir):
flist += [self.cache.local.cache_dir]
Expand Down
7 changes: 7 additions & 0 deletions dvc/repo/get.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from dvc.config import Config
from dvc.path_info import PathInfo
from dvc.external_repo import external_repo
from dvc.state import StateNoop
from dvc.utils import remove
from dvc.utils.compat import urlparse

Expand All @@ -21,6 +22,11 @@ def get(url, path, out=None, rev=None):
tmp_dir = os.path.join(dpath, "." + str(shortuuid.uuid()))
try:
with external_repo(cache_dir=tmp_dir, url=url, rev=rev) as repo:
# Note: we need to replace state, because in case of getting DVC
# dependency on CIFS or NFS filesystems, sqlite-based state
# will be unable to obtain lock
repo.state = StateNoop()

# Try any links possible to avoid data duplication.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only change should be here:

repo.state = ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed, state db is not connected to until __enter__, so we could do that.

#
# Not using symlink, because we need to remove cache after we are
Expand All @@ -35,6 +41,7 @@ def get(url, path, out=None, rev=None):
Config.SECTION_CACHE_TYPE,
"reflink,hardlink,copy",
)

o = repo.find_out_by_relpath(path)
with repo.state:
repo.cloud.pull(o.get_used_cache())
Expand Down
16 changes: 14 additions & 2 deletions dvc/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def __init__(self, dvc_version, expected, actual):
)


class StateBase(object):
class StateNoop(object):
files = []

def save(self, path_info, checksum):
pass

Expand All @@ -41,8 +43,14 @@ def get(self, path_info):
def save_link(self, path_info):
pass

def __enter__(self):
pass

class State(StateBase): # pylint: disable=too-many-instance-attributes
def __exit__(self, exc_type, exc_val, exc_tb):
pass

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We won't need these either if we only change remote state.


class State(object): # pylint: disable=too-many-instance-attributes
"""Class for the state database.

Args:
Expand Down Expand Up @@ -112,6 +120,10 @@ def __init__(self, repo, config):
self.cursor = None
self.inserts = 0

@property
def files(self):
return self.temp_files + [self.state_file]

def __enter__(self):
self.load()

Expand Down