Skip to content

Commit

Permalink
dvc: remote: optimize imports
Browse files Browse the repository at this point in the history
Decreases `time dvc --version` from ~1.4 sec to ~0.3 sec.

Related #2495
Related #2445

Signed-off-by: Ruslan Kuprieiev <[email protected]>
  • Loading branch information
efiop committed Sep 23, 2019
1 parent 70efa6c commit fd14ca3
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 68 deletions.
13 changes: 6 additions & 7 deletions dvc/remote/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@

from dvc.scheme import Schemes

try:
from azure.storage.blob import BlockBlobService, BlobPermissions
from azure.common import AzureMissingResourceHttpError
except ImportError:
BlockBlobService = None

from dvc.utils.compat import urlparse
from dvc.progress import Tqdm
from dvc.config import Config
Expand All @@ -35,7 +29,7 @@ class RemoteAZURE(RemoteBASE):
r"(ContainerName=(?P<container_name>[^;]+);?)?"
r"(?P<connection_string>.+)?)?)$"
)
REQUIRES = {"azure-storage-blob": BlockBlobService}
REQUIRES = {"azure-storage-blob": "azure.storage.blob"}
PARAM_CHECKSUM = "etag"
COPY_POLL_SECONDS = 5

Expand Down Expand Up @@ -72,6 +66,9 @@ def __init__(self, repo, config):

@cached_property
def blob_service(self):
from azure.storage.blob import BlockBlobService
from azure.common import AzureMissingResourceHttpError

logger.debug("URL {}".format(self.path_info))
logger.debug("Connection string {}".format(self.connection_string))
blob_service = BlockBlobService(
Expand Down Expand Up @@ -139,6 +136,8 @@ def exists(self, path_info):
return any(path_info.path == path for path in paths)

def _generate_download_url(self, path_info, expires=3600):
from azure.storage.blob import BlobPermissions

expires_at = datetime.utcnow() + timedelta(seconds=expires)

sas_token = self.blob_service.generate_blob_shared_access_signature(
Expand Down
64 changes: 39 additions & 25 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,31 +86,8 @@ class RemoteBASE(object):

def __init__(self, repo, config):
self.repo = repo
deps_ok = all(self.REQUIRES.values())
if not deps_ok:
missing = [k for k, v in self.REQUIRES.items() if v is None]
url = config.get(
Config.SECTION_REMOTE_URL, "{}://".format(self.scheme)
)
msg = (
"URL '{}' is supported but requires these missing "
"dependencies: {}. If you have installed dvc using pip, "
"choose one of these options to proceed: \n"
"\n"
" 1) Install specific missing dependencies:\n"
" pip install {}\n"
" 2) Install dvc package that includes those missing "
"dependencies: \n"
" pip install 'dvc[{}]'\n"
" 3) Install dvc package with all possible "
"dependencies included: \n"
" pip install 'dvc[all]'\n"
"\n"
"If you have installed dvc from a binary package and you "
"are still seeing this message, please report it to us "
"using https://github.com/iterative/dvc/issues. Thank you!"
).format(url, missing, " ".join(missing), self.scheme)
raise RemoteMissingDepsError(msg)

self._check_requires(config)

core = config.get(Config.SECTION_CORE, {})
self.checksum_jobs = core.get(
Expand All @@ -130,6 +107,43 @@ def __init__(self, repo, config):
self.cache_types = copy(self.DEFAULT_CACHE_TYPES)
self.cache_type_confirmed = False

def _check_requires(self, config):
import importlib

missing = []

for package, module in self.REQUIRES.items():
try:
importlib.import_module(module)
except ImportError:
missing.append(package)

if not missing:
return

url = config.get(
Config.SECTION_REMOTE_URL, "{}://".format(self.scheme)
)
msg = (
"URL '{}' is supported but requires these missing "
"dependencies: {}. If you have installed dvc using pip, "
"choose one of these options to proceed: \n"
"\n"
" 1) Install specific missing dependencies:\n"
" pip install {}\n"
" 2) Install dvc package that includes those missing "
"dependencies: \n"
" pip install 'dvc[{}]'\n"
" 3) Install dvc package with all possible "
"dependencies included: \n"
" pip install 'dvc[all]'\n"
"\n"
"If you have installed dvc from a binary package and you "
"are still seeing this message, please report it to us "
"using https://github.com/iterative/dvc/issues. Thank you!"
).format(url, missing, " ".join(missing), self.scheme)
raise RemoteMissingDepsError(msg)

def __repr__(self):
return "{class_name}: '{path_info}'".format(
class_name=type(self).__name__,
Expand Down
13 changes: 5 additions & 8 deletions dvc/remote/gs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,6 @@
from funcy import cached_property
from dvc.utils.compat import FileNotFoundError

try:
from google.cloud import storage
except ImportError:
storage = None

from dvc.remote.base import RemoteBASE
from dvc.config import Config
from dvc.exceptions import DvcException
Expand All @@ -22,7 +17,7 @@
class RemoteGS(RemoteBASE):
scheme = Schemes.GS
path_cls = CloudURLInfo
REQUIRES = {"google.cloud.storage": storage}
REQUIRES = {"google-cloud-storage": "google.cloud.storage"}
PARAM_CHECKSUM = "md5"

def __init__(self, repo, config):
Expand All @@ -37,10 +32,12 @@ def __init__(self, repo, config):

@cached_property
def gs(self):
from google.cloud.storage import Client

return (
storage.Client.from_service_account_json(self.credentialpath)
Client.from_service_account_json(self.credentialpath)
if self.credentialpath
else storage.Client(self.projectname)
else Client(self.projectname)
)

def get_file_checksum(self, path_info):
Expand Down
12 changes: 4 additions & 8 deletions dvc/remote/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@
from subprocess import Popen, PIPE
from contextlib import contextmanager, closing

try:
import pyarrow
except ImportError:
pyarrow = None

from dvc.config import Config
from dvc.scheme import Schemes

Expand All @@ -30,7 +25,7 @@ class RemoteHDFS(RemoteBASE):
scheme = Schemes.HDFS
REGEX = r"^hdfs://((?P<user>.*)@)?.*$"
PARAM_CHECKSUM = "checksum"
REQUIRES = {"pyarrow": pyarrow}
REQUIRES = {"pyarrow": "pyarrow"}

def __init__(self, repo, config):
super(RemoteHDFS, self).__init__(repo, config)
Expand All @@ -55,8 +50,9 @@ def __init__(self, repo, config):
path=parsed.path,
)

@staticmethod
def hdfs(path_info):
def hdfs(self, path_info):
import pyarrow

return get_connection(
pyarrow.hdfs.connect,
path_info.host,
Expand Down
11 changes: 5 additions & 6 deletions dvc/remote/oss.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@

from dvc.scheme import Schemes

try:
import oss2
except ImportError:
oss2 = None

from dvc.config import Config
from dvc.remote.base import RemoteBASE
from dvc.progress import Tqdm
Expand Down Expand Up @@ -41,7 +36,7 @@ class RemoteOSS(RemoteBASE):

scheme = Schemes.OSS
path_cls = CloudURLInfo
REQUIRES = {"oss2": oss2}
REQUIRES = {"oss2": "oss2"}
PARAM_CHECKSUM = "etag"
COPY_POLL_SECONDS = 5

Expand Down Expand Up @@ -71,6 +66,8 @@ def __init__(self, repo, config):

@property
def oss_service(self):
import oss2

if self._bucket is None:
logger.debug("URL {}".format(self.path_info))
logger.debug("key id {}".format(self.key_id))
Expand Down Expand Up @@ -98,6 +95,8 @@ def remove(self, path_info):
self.oss_service.delete_object(path_info.path)

def _list_paths(self, prefix):
import oss2

for blob in oss2.ObjectIterator(self.oss_service, prefix=prefix):
yield blob.key

Expand Down
9 changes: 3 additions & 6 deletions dvc/remote/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,6 @@
import logging
from funcy import cached_property

try:
import boto3
except ImportError:
boto3 = None

from dvc.progress import Tqdm
from dvc.config import Config
from dvc.remote.base import RemoteBASE
Expand All @@ -22,7 +17,7 @@
class RemoteS3(RemoteBASE):
scheme = Schemes.S3
path_cls = CloudURLInfo
REQUIRES = {"boto3": boto3}
REQUIRES = {"boto3": "boto3"}
PARAM_CHECKSUM = "etag"

def __init__(self, repo, config):
Expand Down Expand Up @@ -61,6 +56,8 @@ def __init__(self, repo, config):

@cached_property
def s3(self):
import boto3

session = boto3.session.Session(
profile_name=self.profile, region_name=self.region
)
Expand Down
9 changes: 3 additions & 6 deletions dvc/remote/ssh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,6 @@
from concurrent.futures import ThreadPoolExecutor
from contextlib import contextmanager, closing

try:
import paramiko
except ImportError:
paramiko = None

import dvc.prompt as prompt
from dvc.config import Config
from dvc.utils import to_chunks
Expand All @@ -33,7 +28,7 @@

class RemoteSSH(RemoteBASE):
scheme = Schemes.SSH
REQUIRES = {"paramiko": paramiko}
REQUIRES = {"paramiko": "paramiko"}

JOBS = 4
PARAM_CHECKSUM = "md5"
Expand Down Expand Up @@ -93,6 +88,8 @@ def ssh_config_filename():

@staticmethod
def _load_user_ssh_config(hostname):
import paramiko

user_config_file = RemoteSSH.ssh_config_filename()
user_ssh_config = {}
if hostname and os.path.exists(user_config_file):
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/remote/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ class TestRemoteBASE(object):

class TestMissingDeps(TestCase, TestRemoteBASE):
def test(self):
REQUIRES = {"foo": None, "bar": None, "mock": mock}
with mock.patch.object(self.REMOTE_CLS, "REQUIRES", REQUIRES):
requires = {"missing": "missing"}
with mock.patch.object(self.REMOTE_CLS, "REQUIRES", requires):
with self.assertRaises(RemoteMissingDepsError):
self.REMOTE_CLS(None, {})

Expand Down

0 comments on commit fd14ca3

Please sign in to comment.