From fd14ca32693d7b9b6e9ef05a23d714094148dac3 Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Mon, 23 Sep 2019 07:03:08 +0300 Subject: [PATCH] dvc: remote: optimize imports Decreases `time dvc --version` from ~1.4 sec to ~0.3 sec. Related #2495 Related #2445 Signed-off-by: Ruslan Kuprieiev --- dvc/remote/azure.py | 13 ++++--- dvc/remote/base.py | 64 +++++++++++++++++++++------------- dvc/remote/gs.py | 13 +++---- dvc/remote/hdfs.py | 12 +++---- dvc/remote/oss.py | 11 +++--- dvc/remote/s3.py | 9 ++--- dvc/remote/ssh/__init__.py | 9 ++--- tests/unit/remote/test_base.py | 4 +-- 8 files changed, 67 insertions(+), 68 deletions(-) diff --git a/dvc/remote/azure.py b/dvc/remote/azure.py index e891c345dd..aec5ffa63c 100644 --- a/dvc/remote/azure.py +++ b/dvc/remote/azure.py @@ -10,12 +10,6 @@ from dvc.scheme import Schemes -try: - from azure.storage.blob import BlockBlobService, BlobPermissions - from azure.common import AzureMissingResourceHttpError -except ImportError: - BlockBlobService = None - from dvc.utils.compat import urlparse from dvc.progress import Tqdm from dvc.config import Config @@ -35,7 +29,7 @@ class RemoteAZURE(RemoteBASE): r"(ContainerName=(?P[^;]+);?)?" r"(?P.+)?)?)$" ) - REQUIRES = {"azure-storage-blob": BlockBlobService} + REQUIRES = {"azure-storage-blob": "azure.storage.blob"} PARAM_CHECKSUM = "etag" COPY_POLL_SECONDS = 5 @@ -72,6 +66,9 @@ def __init__(self, repo, config): @cached_property def blob_service(self): + from azure.storage.blob import BlockBlobService + from azure.common import AzureMissingResourceHttpError + logger.debug("URL {}".format(self.path_info)) logger.debug("Connection string {}".format(self.connection_string)) blob_service = BlockBlobService( @@ -139,6 +136,8 @@ def exists(self, path_info): return any(path_info.path == path for path in paths) def _generate_download_url(self, path_info, expires=3600): + from azure.storage.blob import BlobPermissions + expires_at = datetime.utcnow() + timedelta(seconds=expires) sas_token = self.blob_service.generate_blob_shared_access_signature( diff --git a/dvc/remote/base.py b/dvc/remote/base.py index b732d10962..41aad4c93d 100644 --- a/dvc/remote/base.py +++ b/dvc/remote/base.py @@ -86,31 +86,8 @@ class RemoteBASE(object): def __init__(self, repo, config): self.repo = repo - deps_ok = all(self.REQUIRES.values()) - if not deps_ok: - missing = [k for k, v in self.REQUIRES.items() if v is None] - url = config.get( - Config.SECTION_REMOTE_URL, "{}://".format(self.scheme) - ) - msg = ( - "URL '{}' is supported but requires these missing " - "dependencies: {}. If you have installed dvc using pip, " - "choose one of these options to proceed: \n" - "\n" - " 1) Install specific missing dependencies:\n" - " pip install {}\n" - " 2) Install dvc package that includes those missing " - "dependencies: \n" - " pip install 'dvc[{}]'\n" - " 3) Install dvc package with all possible " - "dependencies included: \n" - " pip install 'dvc[all]'\n" - "\n" - "If you have installed dvc from a binary package and you " - "are still seeing this message, please report it to us " - "using https://github.com/iterative/dvc/issues. Thank you!" - ).format(url, missing, " ".join(missing), self.scheme) - raise RemoteMissingDepsError(msg) + + self._check_requires(config) core = config.get(Config.SECTION_CORE, {}) self.checksum_jobs = core.get( @@ -130,6 +107,43 @@ def __init__(self, repo, config): self.cache_types = copy(self.DEFAULT_CACHE_TYPES) self.cache_type_confirmed = False + def _check_requires(self, config): + import importlib + + missing = [] + + for package, module in self.REQUIRES.items(): + try: + importlib.import_module(module) + except ImportError: + missing.append(package) + + if not missing: + return + + url = config.get( + Config.SECTION_REMOTE_URL, "{}://".format(self.scheme) + ) + msg = ( + "URL '{}' is supported but requires these missing " + "dependencies: {}. If you have installed dvc using pip, " + "choose one of these options to proceed: \n" + "\n" + " 1) Install specific missing dependencies:\n" + " pip install {}\n" + " 2) Install dvc package that includes those missing " + "dependencies: \n" + " pip install 'dvc[{}]'\n" + " 3) Install dvc package with all possible " + "dependencies included: \n" + " pip install 'dvc[all]'\n" + "\n" + "If you have installed dvc from a binary package and you " + "are still seeing this message, please report it to us " + "using https://github.com/iterative/dvc/issues. Thank you!" + ).format(url, missing, " ".join(missing), self.scheme) + raise RemoteMissingDepsError(msg) + def __repr__(self): return "{class_name}: '{path_info}'".format( class_name=type(self).__name__, diff --git a/dvc/remote/gs.py b/dvc/remote/gs.py index 5ef730dd0e..9c49377516 100644 --- a/dvc/remote/gs.py +++ b/dvc/remote/gs.py @@ -5,11 +5,6 @@ from funcy import cached_property from dvc.utils.compat import FileNotFoundError -try: - from google.cloud import storage -except ImportError: - storage = None - from dvc.remote.base import RemoteBASE from dvc.config import Config from dvc.exceptions import DvcException @@ -22,7 +17,7 @@ class RemoteGS(RemoteBASE): scheme = Schemes.GS path_cls = CloudURLInfo - REQUIRES = {"google.cloud.storage": storage} + REQUIRES = {"google-cloud-storage": "google.cloud.storage"} PARAM_CHECKSUM = "md5" def __init__(self, repo, config): @@ -37,10 +32,12 @@ def __init__(self, repo, config): @cached_property def gs(self): + from google.cloud.storage import Client + return ( - storage.Client.from_service_account_json(self.credentialpath) + Client.from_service_account_json(self.credentialpath) if self.credentialpath - else storage.Client(self.projectname) + else Client(self.projectname) ) def get_file_checksum(self, path_info): diff --git a/dvc/remote/hdfs.py b/dvc/remote/hdfs.py index efdbfbba4f..927737a9dc 100644 --- a/dvc/remote/hdfs.py +++ b/dvc/remote/hdfs.py @@ -9,11 +9,6 @@ from subprocess import Popen, PIPE from contextlib import contextmanager, closing -try: - import pyarrow -except ImportError: - pyarrow = None - from dvc.config import Config from dvc.scheme import Schemes @@ -30,7 +25,7 @@ class RemoteHDFS(RemoteBASE): scheme = Schemes.HDFS REGEX = r"^hdfs://((?P.*)@)?.*$" PARAM_CHECKSUM = "checksum" - REQUIRES = {"pyarrow": pyarrow} + REQUIRES = {"pyarrow": "pyarrow"} def __init__(self, repo, config): super(RemoteHDFS, self).__init__(repo, config) @@ -55,8 +50,9 @@ def __init__(self, repo, config): path=parsed.path, ) - @staticmethod - def hdfs(path_info): + def hdfs(self, path_info): + import pyarrow + return get_connection( pyarrow.hdfs.connect, path_info.host, diff --git a/dvc/remote/oss.py b/dvc/remote/oss.py index ca6d5a4e0d..185c7b7a84 100644 --- a/dvc/remote/oss.py +++ b/dvc/remote/oss.py @@ -6,11 +6,6 @@ from dvc.scheme import Schemes -try: - import oss2 -except ImportError: - oss2 = None - from dvc.config import Config from dvc.remote.base import RemoteBASE from dvc.progress import Tqdm @@ -41,7 +36,7 @@ class RemoteOSS(RemoteBASE): scheme = Schemes.OSS path_cls = CloudURLInfo - REQUIRES = {"oss2": oss2} + REQUIRES = {"oss2": "oss2"} PARAM_CHECKSUM = "etag" COPY_POLL_SECONDS = 5 @@ -71,6 +66,8 @@ def __init__(self, repo, config): @property def oss_service(self): + import oss2 + if self._bucket is None: logger.debug("URL {}".format(self.path_info)) logger.debug("key id {}".format(self.key_id)) @@ -98,6 +95,8 @@ def remove(self, path_info): self.oss_service.delete_object(path_info.path) def _list_paths(self, prefix): + import oss2 + for blob in oss2.ObjectIterator(self.oss_service, prefix=prefix): yield blob.key diff --git a/dvc/remote/s3.py b/dvc/remote/s3.py index 7de02f152d..51f07f94df 100644 --- a/dvc/remote/s3.py +++ b/dvc/remote/s3.py @@ -4,11 +4,6 @@ import logging from funcy import cached_property -try: - import boto3 -except ImportError: - boto3 = None - from dvc.progress import Tqdm from dvc.config import Config from dvc.remote.base import RemoteBASE @@ -22,7 +17,7 @@ class RemoteS3(RemoteBASE): scheme = Schemes.S3 path_cls = CloudURLInfo - REQUIRES = {"boto3": boto3} + REQUIRES = {"boto3": "boto3"} PARAM_CHECKSUM = "etag" def __init__(self, repo, config): @@ -61,6 +56,8 @@ def __init__(self, repo, config): @cached_property def s3(self): + import boto3 + session = boto3.session.Session( profile_name=self.profile, region_name=self.region ) diff --git a/dvc/remote/ssh/__init__.py b/dvc/remote/ssh/__init__.py index e4cc4cac56..75a2a6f980 100644 --- a/dvc/remote/ssh/__init__.py +++ b/dvc/remote/ssh/__init__.py @@ -10,11 +10,6 @@ from concurrent.futures import ThreadPoolExecutor from contextlib import contextmanager, closing -try: - import paramiko -except ImportError: - paramiko = None - import dvc.prompt as prompt from dvc.config import Config from dvc.utils import to_chunks @@ -33,7 +28,7 @@ class RemoteSSH(RemoteBASE): scheme = Schemes.SSH - REQUIRES = {"paramiko": paramiko} + REQUIRES = {"paramiko": "paramiko"} JOBS = 4 PARAM_CHECKSUM = "md5" @@ -93,6 +88,8 @@ def ssh_config_filename(): @staticmethod def _load_user_ssh_config(hostname): + import paramiko + user_config_file = RemoteSSH.ssh_config_filename() user_ssh_config = {} if hostname and os.path.exists(user_config_file): diff --git a/tests/unit/remote/test_base.py b/tests/unit/remote/test_base.py index ee9c9835d2..e42c05e25a 100644 --- a/tests/unit/remote/test_base.py +++ b/tests/unit/remote/test_base.py @@ -10,8 +10,8 @@ class TestRemoteBASE(object): class TestMissingDeps(TestCase, TestRemoteBASE): def test(self): - REQUIRES = {"foo": None, "bar": None, "mock": mock} - with mock.patch.object(self.REMOTE_CLS, "REQUIRES", REQUIRES): + requires = {"missing": "missing"} + with mock.patch.object(self.REMOTE_CLS, "REQUIRES", requires): with self.assertRaises(RemoteMissingDepsError): self.REMOTE_CLS(None, {})