forked from iterative/dvc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
tree,remote: add support for WebDAV (iterative#4256)
* tree,remote: add support for webdav Webdav support is based on https://pypi.org/project/webdavclient3/ and supports basic download/upload operation, directory creation as well as existence, file hash and isdir query. Copy, move and remove are also implemented, though probably not used yet. WebdavURLInfo is taken from https://github.com/shizacat/dvc/tree/remote-webdav Fixes iterative#1153 * tree,remote: add further webdavclient3 options Webdav token auth, certificate and key path and connection timeout are configurable. Webdav username might be specified or extracted from URL. Refs iterative#1153 * tree,remote: validate webdav client configuration Refs iterative#1153 * tree,remote: WebDAV is written with capitalized 'DAV'... Refs iterative#1153 * tree,remote: terminate WebDAV makedirs at self.path_info.path This enables the WebDAV api location (e.g. '/public.php/webdav') to be part of the remote 'url' configuration instead of beeing specified separately via the 'root' option. The 'root' option may then be used to specify real directories at the WebDAV storage, although using it to set the api location is still possible. Refs iterative#1153 * tree,remote: use >=3.14.5 for WebDAV dependency webdavclient3 Context: iterative#4256 (comment) Refs iterative#1153 * tree,remote: get rid of WebDAV 'root' option and add connection check The WebDAV 'root' option was rather confusing and should be handled by the initial 'path_info' from the config 'url' option. Context: iterative#4256 (comment) While stripping the path/root from the hostname the port got lost, which is fixed now by simply using the URLInfo 'replace' method as suggested. Context: iterative#4256 (comment) The WebDAV client connection is tested by probing the existence of the root (self.path_info.path). Refs iterative#1153 * tree,remote: implement walk_files for WebDAV Context: iterative#4256 (comment) Refs: iterative#1153 * tree,remote: let WebDAV client list query file info in walk_files Context: iterative#4256 (comment) Refs iterative#1153 * tree,remote: add some unit tests for WebDAVTree Refs iterative#1153 * tree,remote: use ConfigError for WebDAVTree and move Error to webdav.py Context: iterative#4256 (comment) Refs iterative#1153 * tree,remote: remove/change some (unnecessary) comments Context: iterative#4256 (comment) * tree,remote: uploading to WebDAV only create directories if not exist * tree,remote: add BaseTree parameter use_dvcignore to WebDAVTree exists Refs iterative/iterative#1153 * tree,remote: remove WebDAVTree copy method as proposed Context: iterative#4256 (comment) Refs iterative#1153 * tree,remote: add progress bar to WebDAV _download and _upload method Context: iterative#4256 (comment) Refs iterative#1153 Co-authored-by: Christoph Berganski <[email protected]>
- Loading branch information
Showing
8 changed files
with
337 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,5 @@ class Schemes: | |
GDRIVE = "gdrive" | ||
LOCAL = "local" | ||
OSS = "oss" | ||
WEBDAV = "webdav" | ||
WEBDAVS = "webdavs" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,253 @@ | ||
import logging | ||
import os | ||
import threading | ||
from collections import deque | ||
|
||
from funcy import cached_property, wrap_prop | ||
|
||
from dvc.config import ConfigError | ||
from dvc.exceptions import DvcException | ||
from dvc.path_info import HTTPURLInfo, WebDAVURLInfo | ||
from dvc.progress import Tqdm | ||
from dvc.scheme import Schemes | ||
|
||
from .base import BaseTree | ||
from .http import ask_password | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class WebDAVConnectionError(DvcException): | ||
def __init__(self, host): | ||
super().__init__(f"Unable to connect to WebDAV {host}.") | ||
|
||
|
||
class WebDAVTree(BaseTree): # pylint:disable=abstract-method | ||
# Use webdav scheme | ||
scheme = Schemes.WEBDAV | ||
|
||
# URLInfo for Webdav ~ replaces webdav -> http | ||
PATH_CLS = WebDAVURLInfo | ||
|
||
# Traversable as walk_files is implemented | ||
CAN_TRAVERSE = True | ||
|
||
# Length of walk_files prefix | ||
TRAVERSE_PREFIX_LEN = 2 | ||
|
||
# Implementation based on webdav3.client | ||
REQUIRES = {"webdavclient3": "webdav3.client"} | ||
|
||
# Chunk size for buffered upload/download with progress bar | ||
CHUNK_SIZE = 2 ** 16 | ||
|
||
# Constructor | ||
def __init__(self, repo, config): | ||
# Call BaseTree constructor | ||
super().__init__(repo, config) | ||
|
||
# Get username from configuration | ||
self.user = config.get("user", None) | ||
|
||
# Get password from configuration (might be None ~ not set) | ||
self.password = config.get("password", None) | ||
|
||
# Whether to ask for password if it is not set | ||
self.ask_password = config.get("ask_password", False) | ||
|
||
# Use token for webdav auth | ||
self.token = config.get("token", None) | ||
|
||
# Path to certificate | ||
self.cert_path = config.get("cert_path", None) | ||
|
||
# Path to private key | ||
self.key_path = config.get("key_path", None) | ||
|
||
# Connection timeout | ||
self.timeout = config.get("timeout", 30) | ||
|
||
# Get URL from configuration | ||
self.url = config.get("url", None) | ||
|
||
# If URL in config parse path_info | ||
if self.url: | ||
self.path_info = self.PATH_CLS(self.url) | ||
|
||
# If username not specified try to use from URL | ||
if self.user is None and self.path_info.user is not None: | ||
self.user = self.path_info.user | ||
|
||
# If username specified add to path_info | ||
if self.user is not None: | ||
self.path_info.user = self.user | ||
else: | ||
self.path_info = None | ||
|
||
# Webdav client | ||
@wrap_prop(threading.Lock()) | ||
@cached_property | ||
def _client(self): | ||
from webdav3.client import Client | ||
|
||
# Construct hostname from path_info by stripping path | ||
http_info = HTTPURLInfo(self.path_info.url) | ||
hostname = http_info.replace(path="").url | ||
|
||
# Set password or ask for it | ||
if self.ask_password and self.password is None and self.token is None: | ||
host, user = self.path_info.host, self.path_info.user | ||
self.password = ask_password(host, user) | ||
|
||
# Setup webdav client options dictionary | ||
options = { | ||
"webdav_hostname": hostname, | ||
"webdav_login": self.user, | ||
"webdav_password": self.password, | ||
"webdav_token": self.token, | ||
"webdav_cert_path": self.cert_path, | ||
"webdav_key_path": self.key_path, | ||
"webdav_timeout": self.timeout, | ||
"webdav_chunk_size": self.CHUNK_SIZE, | ||
} | ||
|
||
client = Client(options) | ||
|
||
# Check whether client options are valid | ||
if not client.valid(): | ||
raise ConfigError( | ||
f"Configuration for WebDAV {hostname} is invalid." | ||
) | ||
|
||
# Check whether connection is valid (root should always exist) | ||
if not client.check(self.path_info.path): | ||
raise WebDAVConnectionError(hostname) | ||
|
||
return client | ||
|
||
# Checks whether file/directory exists at remote | ||
def exists(self, path_info, use_dvcignore=True): | ||
# Use webdav check to test for file existence | ||
return self._client.check(path_info.path) | ||
|
||
# Gets file hash 'etag' | ||
def get_file_hash(self, path_info): | ||
# Use webdav client info method to get etag | ||
etag = self._client.info(path_info.path)["etag"].strip('"') | ||
|
||
# From HTTPTree | ||
if not etag: | ||
raise DvcException( | ||
"could not find an ETag or " | ||
"Content-MD5 header for '{url}'".format(url=path_info.url) | ||
) | ||
|
||
if etag.startswith("W/"): | ||
raise DvcException( | ||
"Weak ETags are not supported." | ||
" (Etag: '{etag}', URL: '{url}')".format( | ||
etag=etag, url=path_info.url | ||
) | ||
) | ||
|
||
return etag | ||
|
||
# Checks whether path points to directory | ||
def isdir(self, path_info): | ||
# Use webdav is_dir to test whether path points to a directory | ||
return self._client.is_dir(path_info.path) | ||
|
||
# Yields path info to all files | ||
def walk_files(self, path_info, **kwargs): | ||
# Check whether directory exists | ||
if not self.exists(path_info): | ||
return | ||
|
||
# Collect directories | ||
dirs = deque([path_info.path]) | ||
|
||
# Iterate all directories found so far | ||
while dirs: | ||
# Iterate directory content | ||
for entry in self._client.list(dirs.pop(), get_info=True): | ||
# Construct path_info to entry | ||
info = path_info.replace(path=entry["path"]) | ||
|
||
# Check whether entry is a directory | ||
if entry["isdir"]: | ||
# Append new found directory to directory list | ||
dirs.append(info.path) | ||
else: | ||
# Yield path info to non directory | ||
yield info | ||
|
||
# Removes file/directory | ||
def remove(self, path_info): | ||
# Use webdav client clean (DELETE) method to remove file/directory | ||
self._client.clean(path_info.path) | ||
|
||
# Creates directories | ||
def makedirs(self, path_info): | ||
# Terminate recursion | ||
if path_info.path == self.path_info.path or self.exists(path_info): | ||
return | ||
|
||
# Recursively descent to root | ||
self.makedirs(path_info.parent) | ||
|
||
# Construct directory at current recursion depth | ||
self._client.mkdir(path_info.path) | ||
|
||
# Moves file/directory at remote | ||
def move(self, from_info, to_info, mode=None): | ||
# Webdav client move | ||
self._client.move(from_info.path, to_info.path) | ||
|
||
# Downloads file from remote to file | ||
def _download(self, from_info, to_file, name=None, no_progress_bar=False): | ||
# Progress from HTTPTree | ||
with open(to_file, "wb") as fd: | ||
with Tqdm.wrapattr( | ||
fd, | ||
"write", | ||
total=None if no_progress_bar else self._file_size(from_info), | ||
leave=False, | ||
desc=from_info.url if name is None else name, | ||
disable=no_progress_bar, | ||
) as fd_wrapped: | ||
# Download from WebDAV via buffer | ||
self._client.download_from( | ||
buff=fd_wrapped, remote_path=from_info.path | ||
) | ||
|
||
# Uploads file to remote | ||
def _upload(self, from_file, to_info, name=None, no_progress_bar=False): | ||
# First try to create parent directories | ||
self.makedirs(to_info.parent) | ||
|
||
# Progress from HTTPTree | ||
def chunks(): | ||
with open(from_file, "rb") as fd: | ||
with Tqdm.wrapattr( | ||
fd, | ||
"read", | ||
total=None | ||
if no_progress_bar | ||
else os.path.getsize(from_file), | ||
leave=False, | ||
desc=to_info.url if name is None else name, | ||
disable=no_progress_bar, | ||
) as fd_wrapped: | ||
while True: | ||
chunk = fd_wrapped.read(self.CHUNK_SIZE) | ||
if not chunk: | ||
break | ||
yield chunk | ||
|
||
# Upload to WebDAV via buffer | ||
self._client.upload_to(buff=chunks(), remote_path=to_info.path) | ||
|
||
# Queries size of file at remote | ||
def _file_size(self, path_info): | ||
# Get file size from info dictionary and convert to int (from str) | ||
return int(self._client.info(path_info.path)["size"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from dvc.scheme import Schemes | ||
|
||
from .webdav import WebDAVTree | ||
|
||
|
||
class WebDAVSTree(WebDAVTree): # pylint:disable=abstract-method | ||
scheme = Schemes.WEBDAVS |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from dvc.tree.webdav import WebDAVTree | ||
|
||
# Test configuration | ||
url = "webdavs://example.com/public.php/webdav" | ||
user = "username" | ||
userurl = f"webdavs://{user}@example.com/public.php/webdav" | ||
password = "password" | ||
|
||
|
||
# Test minimum requiered configuration (url) | ||
def test_init(dvc): | ||
config = {"url": url} | ||
tree = WebDAVTree(dvc, config) | ||
|
||
assert tree.path_info == url | ||
|
||
|
||
# Test username from configuration | ||
def test_user(dvc): | ||
config = {"url": url, "user": user} | ||
tree = WebDAVTree(dvc, config) | ||
|
||
assert tree.user == user | ||
assert tree.path_info.user == user | ||
|
||
|
||
# Test username extraction from url | ||
def test_userurl(dvc): | ||
config = {"url": userurl} | ||
tree = WebDAVTree(dvc, config) | ||
|
||
assert tree.path_info == userurl | ||
assert tree.user == user | ||
assert tree.path_info.user == user | ||
|
||
|
||
# test password from config | ||
def test_password(dvc): | ||
config = {"url": url, "user": user, "password": password} | ||
tree = WebDAVTree(dvc, config) | ||
|
||
assert tree.password == password |