Skip to content
This repository has been archived by the owner on Sep 13, 2023. It is now read-only.

Commit

Permalink
Refactor github and gitlab resolvers, implement bitbucket (#287)
Browse files Browse the repository at this point in the history
* gitlab fs WIP

* add gitlab support for uri's

* bitbucket WIP

* implement bitbucket resolver
tests

refactor github, gitlab and bitbucket resolvers

* implement bitbucket resolver
tests

refactor github, gitlab and bitbucket resolvers

* fix lint

* add secret envs
  • Loading branch information
mike0sv authored Jun 20, 2022
1 parent 9b724af commit 482855a
Show file tree
Hide file tree
Showing 13 changed files with 628 additions and 287 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/check-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ jobs:
HEROKU_TEAM: iterative-sandbox
GITHUB_MATRIX_OS: ${{ matrix.os }}
GITHUB_MATRIX_PYTHON: ${{ matrix.python }}
BITBUCKET_USERNAME: ${{ secrets.BITBUCKET_USERNAME }}
BITBUCKET_PASSWORD: ${{ secrets.BITBUCKET_PASSWORD }}
- name: "Upload coverage to Codecov"
uses: codecov/codecov-action@v1
with:
Expand Down
10 changes: 7 additions & 3 deletions mlem/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def inner(settings: BaseSettings) -> Dict[str, Any]:
return inner


T = TypeVar("T", bound="MlemConfigBase")


class MlemConfigBase(BaseSettings):
"""Special base for mlem settings to be able to read them from files"""

Expand Down Expand Up @@ -93,6 +96,10 @@ def ignore_case(
new_value[key] = val
return new_value

@classmethod
def local(cls: Type[T]) -> T:
return project_config("", section=cls)


class MlemConfig(MlemConfigBase):
"""Base Mlem Config"""
Expand Down Expand Up @@ -149,9 +156,6 @@ def get_config_cls(section: str) -> Type[MlemConfigBase]:
raise UnknownConfigSection(section) from e


T = TypeVar("T", bound=MlemConfigBase)


@overload
def project_config(
project: Optional[str],
Expand Down
272 changes: 272 additions & 0 deletions mlem/contrib/bitbucketfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
import posixpath
from typing import ClassVar, List, Optional
from urllib.parse import quote_plus, urljoin, urlparse, urlsplit

import requests
from fsspec import AbstractFileSystem
from fsspec.implementations.memory import MemoryFile
from fsspec.registry import known_implementations
from pydantic import Field
from requests import HTTPError

from mlem.config import MlemConfigBase
from mlem.core.meta_io import CloudGitResolver

BITBUCKET_ORG = "https://bitbucket.org"


class BitbucketWrapper:

tree_endpoint = "/api/internal/repositories/{repo}/tree/{rev}/{path}"
repo_endpoint = "/api/2.0/repositories/{repo}"
refs_endpoint = "/api/2.0/repositories/{repo}/refs"
file_endpoint = "/api/2.0/repositories/{repo}/src/{rev}/{path}"

def __init__(
self, url: str, username: Optional[str], password: Optional[str]
):
self.username = username
self.password = password
self.url = url

@property
def auth(self):
if self.username is not None and self.password is not None:
return self.username, self.password
return None

def tree(self, path: str, repo: str, rev: str):
r = requests.get(
urljoin(
self.url,
self.tree_endpoint.format(path=path or "", repo=repo, rev=rev),
),
auth=self.auth,
)
r.raise_for_status()
return r.json()[0]["contents"]

def get_default_branch(self, repo: str):
r = requests.get(
urljoin(self.url, self.repo_endpoint.format(repo=repo)),
auth=self.auth,
)
r.raise_for_status()
return r.json()["mainbranch"]["name"]

def open(self, path: str, repo: str, rev: str):
r = requests.get(
urljoin(
self.url,
self.file_endpoint.format(path=path, repo=repo, rev=rev),
),
auth=self.auth,
)
r.raise_for_status()
return r.content

def get_refs(self, repo: str) -> List[str]:
r = requests.get(
urljoin(self.url, self.refs_endpoint.format(repo=repo)),
auth=self.auth,
)
r.raise_for_status()
return [v["name"] for v in r.json()["values"]]

def check_rev(self, repo: str, rev: str) -> bool:
r = requests.head(
urljoin(
self.url,
self.file_endpoint.format(path="", repo=repo, rev=rev),
)
)
return r.status_code == 200


class BitbucketConfig(MlemConfigBase):
class Config:
section = "bitbucket"

USERNAME: Optional[str] = Field(default=None, env="BITBUCKET_USERNAME")
PASSWORD: Optional[str] = Field(default=None, env="BITBUCKET_PASSWORD")


class BitBucketFileSystem(
AbstractFileSystem
): # pylint: disable=abstract-method
def __init__(
self,
repo: str,
sha: str = None,
host: str = BITBUCKET_ORG,
username: str = None,
password: str = None,
**kwargs,
):
super().__init__(**kwargs)
conf = BitbucketConfig.local()
self.password = password or conf.PASSWORD
self.username = username or conf.USERNAME
self.repo = repo
self.host = host

self.bb = BitbucketWrapper(host, self.username, self.password)
if sha is None:
sha = self.bb.get_default_branch(repo)
self.root = sha
self.ls("")

def invalidate_cache(self, path=None):
super().invalidate_cache(path)
self.dircache.clear()

def ls(self, path, detail=False, sha=None, **kwargs):
path = self._strip_protocol(path)
if path not in self.dircache or sha not in [self.root, None]:
try:
r = self.bb.tree(
path=path, repo=self.repo, rev=sha or self.root
)
except HTTPError as e:
if e.response.status_code == 404:
raise FileNotFoundError() from e
raise
out = [
{
"name": posixpath.join(path, f["name"]),
"mode": None,
"type": f["type"],
"size": f.get("size", 0),
"sha": sha,
}
for f in r
]
if sha in [self.root, None]:
self.dircache[path] = out
else:
out = self.dircache[path]

if detail:
return out
return sorted([f["name"] for f in out])

@classmethod
def _strip_protocol(cls, path):
if "@" in path:
return cls._get_kwargs_from_urls(path)["path"]
return super()._strip_protocol(path)

@classmethod
def _get_kwargs_from_urls(cls, path):
parsed_path = urlsplit(path)
protocol = parsed_path.scheme
if protocol != "bitbucket":
return {"path": path}
repo, path = super()._strip_protocol(path).split("@", maxsplit=2)
sha, path = _mathch_path_with_ref(repo, path)
return {
"path": path,
"sha": sha,
"protocol": protocol,
"repo": repo,
}

def _open(
self,
path,
mode="rb",
block_size=None,
autocommit=True,
cache_options=None,
sha=None,
**kwargs,
):
if mode != "rb":
raise NotImplementedError
return MemoryFile(
None,
None,
self.bb.open(path, self.repo, rev=sha or self.root),
)


known_implementations["bitbucket"] = {
"class": f"{BitBucketFileSystem.__module__}.{BitBucketFileSystem.__name__}"
}


def ls_bb_refs(repo):
conf = BitbucketConfig.local()
password = conf.PASSWORD
username = conf.USERNAME
return BitbucketWrapper(
BITBUCKET_ORG, username=username, password=password
).get_refs(repo)


def _mathch_path_with_ref(repo, path):
path = path.split("/")
sha = path[0]
refs = ls_bb_refs(repo)
branches = {quote_plus(k) for k in refs}
# match beginning of path with one of existing branches
# "" is hack for cases with empty path (like 'github.com/org/rep/tree/branch/')
for i, part in enumerate(path[1:] + [""], start=1):
if sha in branches:
path = path[i:]
break
sha = f"{sha}%2F{part}"
else:
raise ValueError(f'Could not resolve branch from path "{path}"')
return sha, posixpath.join(*path)


class BitBucketResolver(CloudGitResolver):
type: ClassVar = "bitbucket"
FS = BitBucketFileSystem
PROTOCOL = "bitbucket"

# TODO: support on-prem gitlab (other hosts)
PREFIXES = [BITBUCKET_ORG, PROTOCOL + "://"]
versioning_support = True

@classmethod
def get_kwargs(cls, uri):
sha: Optional[str]
parsed = urlparse(uri)
repo, *path = parsed.path.strip("/").split("/src/")
if not path:
return {"repo": repo, "path": ""}
sha, path = _mathch_path_with_ref(repo, path[0])
return {"repo": repo, "sha": sha, "path": path}

@classmethod
def check_rev(cls, options):
conf = BitbucketConfig.local()
password = conf.PASSWORD
username = conf.USERNAME
return BitbucketWrapper(
BITBUCKET_ORG, username=username, password=password
).check_rev(options["repo"], options["sha"])

@classmethod
def get_uri(
cls,
path: str,
project: Optional[str],
rev: Optional[str],
fs: BitBucketFileSystem,
):
fullpath = posixpath.join(project or "", path)
return f"{BITBUCKET_ORG}/{fs.repo}/src/{fs.root}/{fullpath}"

@classmethod
def get_project_uri( # pylint: disable=unused-argument
cls,
path: str,
project: Optional[str],
rev: Optional[str],
fs: BitBucketFileSystem,
uri: str,
):
return f"{BITBUCKET_ORG}/{fs.repo}/src/{fs.root}/{project or ''}"
Loading

0 comments on commit 482855a

Please sign in to comment.