Skip to content
This repository has been archived by the owner on Sep 13, 2023. It is now read-only.

Refactor github and gitlab resolvers, implement bitbucket #287

Merged
merged 9 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/check-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ jobs:
HEROKU_TEAM: iterative-sandbox
GITHUB_MATRIX_OS: ${{ matrix.os }}
GITHUB_MATRIX_PYTHON: ${{ matrix.python }}
BITBUCKET_USERNAME: ${{ secrets.BITBUCKET_USERNAME }}
BITBUCKET_PASSWORD: ${{ secrets.BITBUCKET_PASSWORD }}
- name: "Upload coverage to Codecov"
uses: codecov/codecov-action@v1
with:
Expand Down
10 changes: 7 additions & 3 deletions mlem/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def inner(settings: BaseSettings) -> Dict[str, Any]:
return inner


T = TypeVar("T", bound="MlemConfigBase")


class MlemConfigBase(BaseSettings):
"""Special base for mlem settings to be able to read them from files"""

Expand Down Expand Up @@ -93,6 +96,10 @@ def ignore_case(
new_value[key] = val
return new_value

@classmethod
def local(cls: Type[T]) -> T:
return project_config("", section=cls)


class MlemConfig(MlemConfigBase):
"""Base Mlem Config"""
Expand Down Expand Up @@ -149,9 +156,6 @@ def get_config_cls(section: str) -> Type[MlemConfigBase]:
raise UnknownConfigSection(section) from e


T = TypeVar("T", bound=MlemConfigBase)


@overload
def project_config(
project: Optional[str],
Expand Down
272 changes: 272 additions & 0 deletions mlem/contrib/bitbucketfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
import posixpath
from typing import ClassVar, List, Optional
from urllib.parse import quote_plus, urljoin, urlparse, urlsplit

import requests
from fsspec import AbstractFileSystem
from fsspec.implementations.memory import MemoryFile
from fsspec.registry import known_implementations
from pydantic import Field
from requests import HTTPError

from mlem.config import MlemConfigBase
from mlem.core.meta_io import CloudGitResolver

BITBUCKET_ORG = "https://bitbucket.org"


class BitbucketWrapper:

tree_endpoint = "/api/internal/repositories/{repo}/tree/{rev}/{path}"
repo_endpoint = "/api/2.0/repositories/{repo}"
refs_endpoint = "/api/2.0/repositories/{repo}/refs"
file_endpoint = "/api/2.0/repositories/{repo}/src/{rev}/{path}"

def __init__(
self, url: str, username: Optional[str], password: Optional[str]
):
self.username = username
self.password = password
self.url = url

@property
def auth(self):
if self.username is not None and self.password is not None:
return self.username, self.password
return None

def tree(self, path: str, repo: str, rev: str):
r = requests.get(
urljoin(
self.url,
self.tree_endpoint.format(path=path or "", repo=repo, rev=rev),
),
auth=self.auth,
)
r.raise_for_status()
return r.json()[0]["contents"]

def get_default_branch(self, repo: str):
r = requests.get(
urljoin(self.url, self.repo_endpoint.format(repo=repo)),
auth=self.auth,
)
r.raise_for_status()
return r.json()["mainbranch"]["name"]

def open(self, path: str, repo: str, rev: str):
r = requests.get(
urljoin(
self.url,
self.file_endpoint.format(path=path, repo=repo, rev=rev),
),
auth=self.auth,
)
r.raise_for_status()
return r.content

def get_refs(self, repo: str) -> List[str]:
r = requests.get(
urljoin(self.url, self.refs_endpoint.format(repo=repo)),
auth=self.auth,
)
r.raise_for_status()
return [v["name"] for v in r.json()["values"]]

def check_rev(self, repo: str, rev: str) -> bool:
r = requests.head(
urljoin(
self.url,
self.file_endpoint.format(path="", repo=repo, rev=rev),
)
)
return r.status_code == 200


class BitbucketConfig(MlemConfigBase):
class Config:
section = "bitbucket"

USERNAME: Optional[str] = Field(default=None, env="BITBUCKET_USERNAME")
PASSWORD: Optional[str] = Field(default=None, env="BITBUCKET_PASSWORD")


class BitBucketFileSystem(
AbstractFileSystem
): # pylint: disable=abstract-method
def __init__(
self,
repo: str,
sha: str = None,
host: str = BITBUCKET_ORG,
username: str = None,
password: str = None,
**kwargs,
):
super().__init__(**kwargs)
conf = BitbucketConfig.local()
self.password = password or conf.PASSWORD
self.username = username or conf.USERNAME
self.repo = repo
self.host = host

self.bb = BitbucketWrapper(host, self.username, self.password)
if sha is None:
sha = self.bb.get_default_branch(repo)
self.root = sha
self.ls("")

def invalidate_cache(self, path=None):
super().invalidate_cache(path)
self.dircache.clear()

def ls(self, path, detail=False, sha=None, **kwargs):
path = self._strip_protocol(path)
if path not in self.dircache or sha not in [self.root, None]:
try:
r = self.bb.tree(
path=path, repo=self.repo, rev=sha or self.root
)
except HTTPError as e:
if e.response.status_code == 404:
raise FileNotFoundError() from e
raise
out = [
{
"name": posixpath.join(path, f["name"]),
"mode": None,
"type": f["type"],
"size": f.get("size", 0),
"sha": sha,
}
for f in r
]
if sha in [self.root, None]:
self.dircache[path] = out
else:
out = self.dircache[path]

if detail:
return out
return sorted([f["name"] for f in out])

@classmethod
def _strip_protocol(cls, path):
if "@" in path:
return cls._get_kwargs_from_urls(path)["path"]
return super()._strip_protocol(path)

@classmethod
def _get_kwargs_from_urls(cls, path):
parsed_path = urlsplit(path)
protocol = parsed_path.scheme
if protocol != "bitbucket":
return {"path": path}
repo, path = super()._strip_protocol(path).split("@", maxsplit=2)
sha, path = _mathch_path_with_ref(repo, path)
return {
"path": path,
"sha": sha,
"protocol": protocol,
"repo": repo,
}

def _open(
self,
path,
mode="rb",
block_size=None,
autocommit=True,
cache_options=None,
sha=None,
**kwargs,
):
if mode != "rb":
raise NotImplementedError
return MemoryFile(
None,
None,
self.bb.open(path, self.repo, rev=sha or self.root),
)


known_implementations["bitbucket"] = {
"class": f"{BitBucketFileSystem.__module__}.{BitBucketFileSystem.__name__}"
}


def ls_bb_refs(repo):
conf = BitbucketConfig.local()
password = conf.PASSWORD
username = conf.USERNAME
return BitbucketWrapper(
BITBUCKET_ORG, username=username, password=password
).get_refs(repo)


def _mathch_path_with_ref(repo, path):
path = path.split("/")
sha = path[0]
refs = ls_bb_refs(repo)
branches = {quote_plus(k) for k in refs}
# match beginning of path with one of existing branches
# "" is hack for cases with empty path (like 'github.com/org/rep/tree/branch/')
for i, part in enumerate(path[1:] + [""], start=1):
if sha in branches:
path = path[i:]
break
sha = f"{sha}%2F{part}"
else:
raise ValueError(f'Could not resolve branch from path "{path}"')
return sha, posixpath.join(*path)


class BitBucketResolver(CloudGitResolver):
type: ClassVar = "bitbucket"
FS = BitBucketFileSystem
PROTOCOL = "bitbucket"

# TODO: support on-prem gitlab (other hosts)
PREFIXES = [BITBUCKET_ORG, PROTOCOL + "://"]
versioning_support = True

@classmethod
def get_kwargs(cls, uri):
sha: Optional[str]
parsed = urlparse(uri)
repo, *path = parsed.path.strip("/").split("/src/")
if not path:
return {"repo": repo, "path": ""}
sha, path = _mathch_path_with_ref(repo, path[0])
return {"repo": repo, "sha": sha, "path": path}

@classmethod
def check_rev(cls, options):
conf = BitbucketConfig.local()
password = conf.PASSWORD
username = conf.USERNAME
return BitbucketWrapper(
BITBUCKET_ORG, username=username, password=password
).check_rev(options["repo"], options["sha"])

@classmethod
def get_uri(
cls,
path: str,
project: Optional[str],
rev: Optional[str],
fs: BitBucketFileSystem,
):
fullpath = posixpath.join(project or "", path)
return f"{BITBUCKET_ORG}/{fs.repo}/src/{fs.root}/{fullpath}"

@classmethod
def get_project_uri( # pylint: disable=unused-argument
cls,
path: str,
project: Optional[str],
rev: Optional[str],
fs: BitBucketFileSystem,
uri: str,
):
return f"{BITBUCKET_ORG}/{fs.repo}/src/{fs.root}/{project or ''}"
Loading