Skip to content

Commit

Permalink
cachi2 rubygems / bundler design document
Browse files Browse the repository at this point in the history
Signed-off-by: Michal Šoltis <[email protected]>
  • Loading branch information
slimreaper35 committed Jul 10, 2024
1 parent 348e454 commit bafa59e
Show file tree
Hide file tree
Showing 7 changed files with 793 additions and 114 deletions.
22 changes: 20 additions & 2 deletions cachi2/core/models/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def show_error(error: "ErrorDict") -> str:


# Supported package managers
PackageManagerType = Literal["gomod", "npm", "pip", "rpm", "yarn"]
PackageManagerType = Literal["gomod", "npm", "pip", "rpm", "rubygems", "yarn"]

Flag = Literal[
"cgo-disable", "dev-package-managers", "force-gomod-tidy", "gomod-vendor", "gomod-vendor-check"
Expand Down Expand Up @@ -179,8 +179,21 @@ class YarnPackageInput(_PackageInputBase):
type: Literal["yarn"]


class RubygemsPackageInput(_PackageInputBase):
"""Accepted input for a Rubygems package."""

type: Literal["rubygems"]


PackageInput = Annotated[
Union[GomodPackageInput, NpmPackageInput, PipPackageInput, RpmPackageInput, YarnPackageInput],
Union[
GomodPackageInput,
NpmPackageInput,
PipPackageInput,
RpmPackageInput,
RubygemsPackageInput,
YarnPackageInput,
],
# https://pydantic-docs.helpmanual.io/usage/types/#discriminated-unions-aka-tagged-unions
pydantic.Field(discriminator="type"),
]
Expand Down Expand Up @@ -246,6 +259,11 @@ def rpm_packages(self) -> list[RpmPackageInput]:
"""Get the rpm packages specified for this request."""
return self._packages_by_type(RpmPackageInput)

@property
def rubygems_packages(self) -> list[RubygemsPackageInput]:
"""Get the Rubygems packages specified for this request."""
return self._packages_by_type(RubygemsPackageInput)

@property
def yarn_packages(self) -> list[YarnPackageInput]:
"""Get the yarn packages specified for this request."""
Expand Down
264 changes: 264 additions & 0 deletions cachi2/core/package_managers/rubygems.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
import logging
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Optional

from gemlock_parser.gemfile_lock import Gem, GemfileLockParser # type: ignore
from packageurl import PackageURL

from cachi2.core.errors import FetchError
from cachi2.core.models.input import Request, RubygemsPackageInput
from cachi2.core.models.output import Component, EnvironmentVariable, ProjectFile, RequestOutput
from cachi2.core.package_managers.general import download_binary_file, extract_git_info
from cachi2.core.rooted_path import RootedPath
from cachi2.core.scm import clone_as_tarball

GEMFILE_LOCK = "Gemfile.lock"

GIT_REF_FORMAT = re.compile(r"^[a-fA-F0-9]{40}$")
PLATFORMS_RUBY = re.compile(r"^PLATFORMS\n {2}ruby\n\n", re.MULTILINE)

log = logging.getLogger(__name__)


def fetch_rubygems_source(request: Request) -> RequestOutput:
"""Resolve and fetch RubyGems dependencies."""
components = []
environment_variables = [
EnvironmentVariable(name="BUNDLE_CACHE_ALL", value="true"),
EnvironmentVariable(name="BUNDLE_CACHE_PATH", value="${output_dir}/deps/rubygems"),
EnvironmentVariable(name="BUNDLE_FORCE_RUBY_PLATFORM", value="true"),
]
project_files: list[ProjectFile] = []

output_dir = request.output_dir.join_within_root("deps", "rubygems")
output_dir.path.mkdir(parents=True, exist_ok=True)

for package in request.rubygems_packages:
info = _resolve_rubygems(request.source_dir, output_dir, package)
components.append(Component.from_package_dict(info["package"]))
for dependency in info["dependencies"]:
components.append(
Component(
name=dependency["name"],
version=dependency["version"],
purl=dependency["purl"],
)
)

return RequestOutput.from_obj_list(
components,
environment_variables=environment_variables,
project_files=project_files,
)


def _resolve_rubygems(
source_dir: RootedPath,
output_dir: RootedPath,
package: RubygemsPackageInput,
) -> dict[str, Any]:
main_package_name, main_package_version = _get_metadata()
purl = PackageURL(
type="rubygems",
name=main_package_name,
version=main_package_version,
)

package_root = source_dir.join_within_root(package.path)
gemlock_path = package_root.join_within_root(GEMFILE_LOCK)

gems = _parse_gemlock(package_root, gemlock_path)
dependencies = _download_dependencies(output_dir, gems, package_root, set())

return {
"package": {
"name": main_package_name,
"version": main_package_version,
"type": "rubygems",
"path": package_root,
"purl": purl.to_string(),
},
"dependencies": dependencies,
}


def _get_metadata() -> tuple[str, str]:
return "foo", "0.1.0"


@dataclass
class GemMetadata:
"""Gem metadata."""

name: str
version: str
type: str
source: str
branch: Optional[str] = None


def _parse_gemlock(
source_dir: RootedPath,
gemlock_path: RootedPath,
) -> list[GemMetadata]:
_validate_gemlock_platforms(gemlock_path)

dependencies = []
parser = GemfileLockParser(str(gemlock_path))
log.info("Bundled with version %s", parser.bundled_with)

for gem in parser.all_gems.values():
if gem.version is None:
log.debug(
f"Skipping RubyGem {gem.name}, because of a missing version. "
f"This means gem is not used in a platform for which Gemfile.lock was generated."
)
continue

_validate_gem_metadata(gem, source_dir, gemlock_path.root)
source = gem.remote if gem.type != "PATH" else gem.path
dependencies.append(GemMetadata(gem.name, gem.version, gem.type, source, gem.branch))

return dependencies


def _validate_gemlock_platforms(gemlock_path: RootedPath) -> None:
with open(gemlock_path) as f:
contents = f.read()

if not PLATFORMS_RUBY.search(contents):
msg = "PLATFORMS section of Gemfile.lock has to contain one and only platform - ruby."
raise FetchError(msg)


def _validate_gem_metadata(gem: Gem, source_dir: RootedPath, gemlock_dir: Path) -> None:
if gem.type == "GEM":
if gem.remote != "https://rubygems.org/":
raise Exception(
"Cachito supports only https://rubygems.org/ as a remote for Ruby GEM dependencies."
)

elif gem.type == "GIT":
if not gem.remote.startswith("https://"):
raise Exception("All Ruby GIT dependencies have to use HTTPS protocol.")
if not GIT_REF_FORMAT.match(gem.version):
msg = (
f"No git ref for gem: {gem.name} (expected 40 hexadecimal characters, "
f"got: {gem.version})."
)
raise Exception(msg)

elif gem.type == "PATH":
_validate_path_dependency_dir(gem, source_dir, gemlock_dir)

else:
raise Exception("Gemfile.lock contains unsupported dependency type.")


def _validate_path_dependency_dir(gem: Gem, source_dir: RootedPath, gemlock_dir: Path) -> None:
dependency_dir = gemlock_dir.joinpath(gem.path)
try:
dependency_dir = dependency_dir.resolve(strict=True)
dependency_dir.relative_to(source_dir)
except FileNotFoundError:
raise FileNotFoundError(
f"PATH dependency {str(gem.name)} references a non-existing path: "
f"{str(dependency_dir)}."
)
except RuntimeError:
raise RuntimeError(
f"Path of PATH dependency {str(gem.name)} contains an infinite loop: "
f"{str(dependency_dir)}."
)
except ValueError:
raise ValueError(f"{str(dependency_dir)} is not a subpath of {str(source_dir)}")


def _download_dependencies(
output_dir: RootedPath,
dependencies: list[GemMetadata],
package_root: RootedPath,
allowed_path_deps: set[str],
) -> list[dict[str, Any]]:
downloads = []

for dep in dependencies:
log.info("Downloading %s (%s)", dep.name, dep.version)

if dep.type == "GEM":
download_info = _download_rubygems_package(dep, output_dir)
elif dep.type == "GIT":
download_info = _download_git_package(dep, output_dir)
elif dep.type == "PATH":
download_info = _get_path_package_info(dep, package_root)
else:
# Should not happen
raise RuntimeError(f"Unexpected dependency type: {dep.type!r}")

if dep.type != "PATH":
log.info(
"Successfully downloaded gem %s (%s) to %s",
dep.name,
dep.version,
download_info["path"],
)

download_info["kind"] = dep.type
download_info["type"] = "rubygems"
download_info["purl"] = PackageURL(
type="rubygems",
name=dep.name,
version=dep.version,
).to_string()
downloads.append(download_info)

return downloads


def _download_rubygems_package(gem: GemMetadata, deps_dir: RootedPath) -> dict[str, Any]:
download_path = deps_dir.join_within_root(f"{gem.name}-{gem.version}.gem")

url = f"https://rubygems.org/gems/{gem.name}-{gem.version}.gem"
download_binary_file(url, download_path.path)

return {
"name": gem.name,
"version": gem.version,
"path": download_path,
}


def _download_git_package(gem: GemMetadata, deps_dir: RootedPath) -> dict[str, Any]:
git_info = extract_git_info(f"{gem.source}@{gem.version}")

package_dir = deps_dir.join_within_root(
git_info["host"],
git_info["namespace"],
git_info["repo"],
)
package_dir.path.mkdir(parents=True, exist_ok=True)

clone_as_tarball(
git_info["url"],
git_info["ref"],
to_path=package_dir.join_within_root("source.tar.gz").path,
)

return {
"name": gem.name,
"version": gem.version,
"path": package_dir,
**git_info,
}


def _get_path_package_info(dep: GemMetadata, package_root: RootedPath) -> dict[str, Any]:
path = package_root.join_within_root(dep.source).subpath_from_root

return {
"name": dep.name,
"version": dep.version,
"path": path,
}
3 changes: 2 additions & 1 deletion cachi2/core/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from cachi2.core.errors import UnsupportedFeature
from cachi2.core.models.input import PackageManagerType, Request
from cachi2.core.models.output import RequestOutput
from cachi2.core.package_managers import gomod, npm, pip, rpm, yarn
from cachi2.core.package_managers import gomod, npm, pip, rpm, rubygems, yarn
from cachi2.core.rooted_path import RootedPath
from cachi2.core.utils import copy_directory

Expand All @@ -17,6 +17,7 @@
"npm": npm.fetch_npm_source,
"pip": pip.fetch_pip_source,
"yarn": yarn.fetch_yarn_source,
"rubygems": rubygems.fetch_rubygems_source,
}

# This is where we put package managers currently under development in order to
Expand Down
Loading

0 comments on commit bafa59e

Please sign in to comment.