This repository has been archived by the owner on Sep 13, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor github and gitlab resolvers, implement bitbucket (#287)
* gitlab fs WIP * add gitlab support for uri's * bitbucket WIP * implement bitbucket resolver tests refactor github, gitlab and bitbucket resolvers * implement bitbucket resolver tests refactor github, gitlab and bitbucket resolvers * fix lint * add secret envs
- Loading branch information
Showing
13 changed files
with
628 additions
and
287 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,272 @@ | ||
import posixpath | ||
from typing import ClassVar, List, Optional | ||
from urllib.parse import quote_plus, urljoin, urlparse, urlsplit | ||
|
||
import requests | ||
from fsspec import AbstractFileSystem | ||
from fsspec.implementations.memory import MemoryFile | ||
from fsspec.registry import known_implementations | ||
from pydantic import Field | ||
from requests import HTTPError | ||
|
||
from mlem.config import MlemConfigBase | ||
from mlem.core.meta_io import CloudGitResolver | ||
|
||
BITBUCKET_ORG = "https://bitbucket.org" | ||
|
||
|
||
class BitbucketWrapper: | ||
|
||
tree_endpoint = "/api/internal/repositories/{repo}/tree/{rev}/{path}" | ||
repo_endpoint = "/api/2.0/repositories/{repo}" | ||
refs_endpoint = "/api/2.0/repositories/{repo}/refs" | ||
file_endpoint = "/api/2.0/repositories/{repo}/src/{rev}/{path}" | ||
|
||
def __init__( | ||
self, url: str, username: Optional[str], password: Optional[str] | ||
): | ||
self.username = username | ||
self.password = password | ||
self.url = url | ||
|
||
@property | ||
def auth(self): | ||
if self.username is not None and self.password is not None: | ||
return self.username, self.password | ||
return None | ||
|
||
def tree(self, path: str, repo: str, rev: str): | ||
r = requests.get( | ||
urljoin( | ||
self.url, | ||
self.tree_endpoint.format(path=path or "", repo=repo, rev=rev), | ||
), | ||
auth=self.auth, | ||
) | ||
r.raise_for_status() | ||
return r.json()[0]["contents"] | ||
|
||
def get_default_branch(self, repo: str): | ||
r = requests.get( | ||
urljoin(self.url, self.repo_endpoint.format(repo=repo)), | ||
auth=self.auth, | ||
) | ||
r.raise_for_status() | ||
return r.json()["mainbranch"]["name"] | ||
|
||
def open(self, path: str, repo: str, rev: str): | ||
r = requests.get( | ||
urljoin( | ||
self.url, | ||
self.file_endpoint.format(path=path, repo=repo, rev=rev), | ||
), | ||
auth=self.auth, | ||
) | ||
r.raise_for_status() | ||
return r.content | ||
|
||
def get_refs(self, repo: str) -> List[str]: | ||
r = requests.get( | ||
urljoin(self.url, self.refs_endpoint.format(repo=repo)), | ||
auth=self.auth, | ||
) | ||
r.raise_for_status() | ||
return [v["name"] for v in r.json()["values"]] | ||
|
||
def check_rev(self, repo: str, rev: str) -> bool: | ||
r = requests.head( | ||
urljoin( | ||
self.url, | ||
self.file_endpoint.format(path="", repo=repo, rev=rev), | ||
) | ||
) | ||
return r.status_code == 200 | ||
|
||
|
||
class BitbucketConfig(MlemConfigBase): | ||
class Config: | ||
section = "bitbucket" | ||
|
||
USERNAME: Optional[str] = Field(default=None, env="BITBUCKET_USERNAME") | ||
PASSWORD: Optional[str] = Field(default=None, env="BITBUCKET_PASSWORD") | ||
|
||
|
||
class BitBucketFileSystem( | ||
AbstractFileSystem | ||
): # pylint: disable=abstract-method | ||
def __init__( | ||
self, | ||
repo: str, | ||
sha: str = None, | ||
host: str = BITBUCKET_ORG, | ||
username: str = None, | ||
password: str = None, | ||
**kwargs, | ||
): | ||
super().__init__(**kwargs) | ||
conf = BitbucketConfig.local() | ||
self.password = password or conf.PASSWORD | ||
self.username = username or conf.USERNAME | ||
self.repo = repo | ||
self.host = host | ||
|
||
self.bb = BitbucketWrapper(host, self.username, self.password) | ||
if sha is None: | ||
sha = self.bb.get_default_branch(repo) | ||
self.root = sha | ||
self.ls("") | ||
|
||
def invalidate_cache(self, path=None): | ||
super().invalidate_cache(path) | ||
self.dircache.clear() | ||
|
||
def ls(self, path, detail=False, sha=None, **kwargs): | ||
path = self._strip_protocol(path) | ||
if path not in self.dircache or sha not in [self.root, None]: | ||
try: | ||
r = self.bb.tree( | ||
path=path, repo=self.repo, rev=sha or self.root | ||
) | ||
except HTTPError as e: | ||
if e.response.status_code == 404: | ||
raise FileNotFoundError() from e | ||
raise | ||
out = [ | ||
{ | ||
"name": posixpath.join(path, f["name"]), | ||
"mode": None, | ||
"type": f["type"], | ||
"size": f.get("size", 0), | ||
"sha": sha, | ||
} | ||
for f in r | ||
] | ||
if sha in [self.root, None]: | ||
self.dircache[path] = out | ||
else: | ||
out = self.dircache[path] | ||
|
||
if detail: | ||
return out | ||
return sorted([f["name"] for f in out]) | ||
|
||
@classmethod | ||
def _strip_protocol(cls, path): | ||
if "@" in path: | ||
return cls._get_kwargs_from_urls(path)["path"] | ||
return super()._strip_protocol(path) | ||
|
||
@classmethod | ||
def _get_kwargs_from_urls(cls, path): | ||
parsed_path = urlsplit(path) | ||
protocol = parsed_path.scheme | ||
if protocol != "bitbucket": | ||
return {"path": path} | ||
repo, path = super()._strip_protocol(path).split("@", maxsplit=2) | ||
sha, path = _mathch_path_with_ref(repo, path) | ||
return { | ||
"path": path, | ||
"sha": sha, | ||
"protocol": protocol, | ||
"repo": repo, | ||
} | ||
|
||
def _open( | ||
self, | ||
path, | ||
mode="rb", | ||
block_size=None, | ||
autocommit=True, | ||
cache_options=None, | ||
sha=None, | ||
**kwargs, | ||
): | ||
if mode != "rb": | ||
raise NotImplementedError | ||
return MemoryFile( | ||
None, | ||
None, | ||
self.bb.open(path, self.repo, rev=sha or self.root), | ||
) | ||
|
||
|
||
known_implementations["bitbucket"] = { | ||
"class": f"{BitBucketFileSystem.__module__}.{BitBucketFileSystem.__name__}" | ||
} | ||
|
||
|
||
def ls_bb_refs(repo): | ||
conf = BitbucketConfig.local() | ||
password = conf.PASSWORD | ||
username = conf.USERNAME | ||
return BitbucketWrapper( | ||
BITBUCKET_ORG, username=username, password=password | ||
).get_refs(repo) | ||
|
||
|
||
def _mathch_path_with_ref(repo, path): | ||
path = path.split("/") | ||
sha = path[0] | ||
refs = ls_bb_refs(repo) | ||
branches = {quote_plus(k) for k in refs} | ||
# match beginning of path with one of existing branches | ||
# "" is hack for cases with empty path (like 'github.com/org/rep/tree/branch/') | ||
for i, part in enumerate(path[1:] + [""], start=1): | ||
if sha in branches: | ||
path = path[i:] | ||
break | ||
sha = f"{sha}%2F{part}" | ||
else: | ||
raise ValueError(f'Could not resolve branch from path "{path}"') | ||
return sha, posixpath.join(*path) | ||
|
||
|
||
class BitBucketResolver(CloudGitResolver): | ||
type: ClassVar = "bitbucket" | ||
FS = BitBucketFileSystem | ||
PROTOCOL = "bitbucket" | ||
|
||
# TODO: support on-prem gitlab (other hosts) | ||
PREFIXES = [BITBUCKET_ORG, PROTOCOL + "://"] | ||
versioning_support = True | ||
|
||
@classmethod | ||
def get_kwargs(cls, uri): | ||
sha: Optional[str] | ||
parsed = urlparse(uri) | ||
repo, *path = parsed.path.strip("/").split("/src/") | ||
if not path: | ||
return {"repo": repo, "path": ""} | ||
sha, path = _mathch_path_with_ref(repo, path[0]) | ||
return {"repo": repo, "sha": sha, "path": path} | ||
|
||
@classmethod | ||
def check_rev(cls, options): | ||
conf = BitbucketConfig.local() | ||
password = conf.PASSWORD | ||
username = conf.USERNAME | ||
return BitbucketWrapper( | ||
BITBUCKET_ORG, username=username, password=password | ||
).check_rev(options["repo"], options["sha"]) | ||
|
||
@classmethod | ||
def get_uri( | ||
cls, | ||
path: str, | ||
project: Optional[str], | ||
rev: Optional[str], | ||
fs: BitBucketFileSystem, | ||
): | ||
fullpath = posixpath.join(project or "", path) | ||
return f"{BITBUCKET_ORG}/{fs.repo}/src/{fs.root}/{fullpath}" | ||
|
||
@classmethod | ||
def get_project_uri( # pylint: disable=unused-argument | ||
cls, | ||
path: str, | ||
project: Optional[str], | ||
rev: Optional[str], | ||
fs: BitBucketFileSystem, | ||
uri: str, | ||
): | ||
return f"{BITBUCKET_ORG}/{fs.repo}/src/{fs.root}/{project or ''}" |
Oops, something went wrong.