Skip to content

Commit

Permalink
yarn-v1: Use real names for packages
Browse files Browse the repository at this point in the history
To make the SBOM even more accurate, we should use the name that comes
from the package respective package.json file either from the cached
tarball or a directory. User could potentially, call these non-registry
dependencies whatever they want and the name in the package.json could
be completely differnet.

Similar behavior is already implemented for yarn berry [1].

---

[1]: https://github.com/containerbuildsystem/cachi2/blob/main/cachi2/core/package_managers/yarn/resolver.py#L356

Signed-off-by: Michal Šoltis <[email protected]>
  • Loading branch information
slimreaper35 committed Dec 12, 2024
1 parent 7ef41a0 commit 7615f82
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 20 deletions.
2 changes: 1 addition & 1 deletion cachi2/core/package_managers/yarn_classic/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def _resolve_yarn_project(project: Project, output_dir: RootedPath) -> None:
prefetch_env = _get_prefetch_environment_variables(output_dir)
_verify_corepack_yarn_version(project.source_dir, prefetch_env)
_fetch_dependencies(project.source_dir, prefetch_env)
packages = resolve_packages(project)
packages = resolve_packages(project, output_dir.join_within_root(MIRROR_DIR))
_verify_no_offline_mirror_collisions(packages)


Expand Down
82 changes: 71 additions & 11 deletions cachi2/core/package_managers/yarn_classic/resolver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import json
import re
import tarfile
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Optional, Union
from typing import Any, Iterable, Optional, Union
from urllib.parse import urlparse

from packageurl import PackageURL
Expand All @@ -12,7 +14,11 @@
from cachi2.core.errors import PackageRejected, UnexpectedFormat
from cachi2.core.package_managers.npm import NPM_REGISTRY_CNAMES
from cachi2.core.package_managers.yarn_classic.project import PackageJson, Project, YarnLock
from cachi2.core.package_managers.yarn_classic.utils import find_runtime_deps
from cachi2.core.package_managers.yarn_classic.utils import (
find_runtime_deps,
get_git_tarball_mirror_name,
get_tarball_mirror_name,
)
from cachi2.core.package_managers.yarn_classic.workspaces import (
Workspace,
extract_workspace_metadata,
Expand Down Expand Up @@ -173,8 +179,11 @@ def purl(self) -> str:


class _YarnClassicPackageFactory:
def __init__(self, source_dir: RootedPath, runtime_deps: set[str]) -> None:
def __init__(
self, source_dir: RootedPath, mirror_dir: RootedPath, runtime_deps: set[str]
) -> None:
self._source_dir = source_dir
self._mirror_dir = mirror_dir
self._runtime_deps = runtime_deps

def create_package_from_pyarn_package(self, package: PYarnPackage) -> YarnClassicPackage:
Expand All @@ -192,6 +201,7 @@ def assert_package_has_relative_path(package: PYarnPackage) -> None:
dev = package_id not in self._runtime_deps

if _is_from_npm_registry(package.url):
# registry packages should already have the correct name
return RegistryPackage(
name=package.name,
version=package.version,
Expand All @@ -206,31 +216,49 @@ def assert_package_has_relative_path(package: PYarnPackage) -> None:
path = self._source_dir.join_within_root(package.path)
# File packages have a url, whereas link packages do not
if package.url:
real_name = _read_name_from_tarball(path)

return FilePackage(
name=package.name,
name=real_name,
version=package.version,
root=self._source_dir,
relpath=path.subpath_from_root,
integrity=package.checksum,
dev=dev,
)

package_json_path = path.join_within_root("package.json")
# package.json is not required for link packages
if package_json_path.path.exists():
real_name = _read_name_from_package_json(package_json_path)
else:
real_name = package.name

return LinkPackage(
name=package.name,
name=real_name,
version=package.version,
root=self._source_dir,
relpath=path.subpath_from_root,
dev=dev,
)
elif _is_git_url(package.url):
tarball_name = get_git_tarball_mirror_name(package.url)
tarball_path = self._mirror_dir.join_within_root(tarball_name)
real_name = _read_name_from_tarball(tarball_path)

return GitPackage(
name=package.name,
name=real_name,
version=package.version,
url=package.url,
dev=dev,
)
elif _is_tarball_url(package.url):
tarball_name = get_tarball_mirror_name(package.url)
tarball_path = self._mirror_dir.join_within_root(tarball_name)
real_name = _read_name_from_tarball(tarball_path)

return UrlPackage(
name=package.name,
name=real_name,
version=package.version,
url=package.url,
integrity=package.checksum,
Expand Down Expand Up @@ -292,11 +320,11 @@ def _is_from_npm_registry(url: str) -> bool:


def _get_packages_from_lockfile(
source_dir: RootedPath, yarn_lock: YarnLock, runtime_deps: set[str]
source_dir: RootedPath, mirror_dir: RootedPath, yarn_lock: YarnLock, runtime_deps: set[str]
) -> list[YarnClassicPackage]:
"""Return a list of Packages for all dependencies in yarn.lock."""
pyarn_packages: list[PYarnPackage] = yarn_lock.yarn_lockfile.packages()
package_factory = _YarnClassicPackageFactory(source_dir, runtime_deps)
package_factory = _YarnClassicPackageFactory(source_dir, mirror_dir, runtime_deps)

return [
package_factory.create_package_from_pyarn_package(package) for package in pyarn_packages
Expand Down Expand Up @@ -333,7 +361,7 @@ def _get_workspace_packages(
]


def resolve_packages(project: Project) -> list[YarnClassicPackage]:
def resolve_packages(project: Project, mirror_dir: RootedPath) -> Iterable[YarnClassicPackage]:
"""Return a list of Packages corresponding to all project dependencies."""
workspaces = extract_workspace_metadata(project.source_dir)
yarn_lock = YarnLock.from_file(project.source_dir.join_within_root("yarn.lock"))
Expand All @@ -343,5 +371,37 @@ def resolve_packages(project: Project) -> list[YarnClassicPackage]:

result.append(_get_main_package(project.source_dir, project.package_json))
result.extend(_get_workspace_packages(project.source_dir, workspaces))
result.extend(_get_packages_from_lockfile(project.source_dir, yarn_lock, runtime_deps))
result.extend(
_get_packages_from_lockfile(project.source_dir, mirror_dir, yarn_lock, runtime_deps)
)
return result


def _read_name_from_tarball(tarball_path: RootedPath) -> str:
"""Read the package name from the package.json file in the cached tarball."""
with tarfile.open(tarball_path) as tar:
names = tar.getnames()
package_json_subpath = next((n for n in names if n.endswith("package.json")), None)
if package_json_subpath is None:
raise ValueError(f"No package.json found in the tarball {tarball_path}")

package_json = tar.extractfile(package_json_subpath)
if package_json is None:
raise ValueError(f"Failed to extract package.json from {tarball_path}")

package_json_content: dict[str, Any] = json.loads(package_json.read())

if (name := package_json_content.get("name")) is None:
raise ValueError(f"No 'name' field found in package.json in {tarball_path}")

return name


def _read_name_from_package_json(path: RootedPath) -> str:
"""Read the package name from a package.json file."""
package_json = PackageJson.from_file(path)

if (name := package_json.data.get("name")) is None:
raise ValueError(f"No 'name' field found in package.json in {path}")

return name
3 changes: 2 additions & 1 deletion tests/unit/package_managers/yarn_classic/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from cachi2.core.models.output import BuildConfig, EnvironmentVariable, RequestOutput
from cachi2.core.models.sbom import Component
from cachi2.core.package_managers.yarn_classic.main import (
MIRROR_DIR,
_fetch_dependencies,
_generate_build_environment_variables,
_get_prefetch_environment_variables,
Expand Down Expand Up @@ -122,7 +123,7 @@ def test_resolve_yarn_project(
mock_fetch_dependencies.assert_called_once_with(
project.source_dir, mock_prefetch_env_vars.return_value
)
mock_resolve_packages.assert_called_once_with(project)
mock_resolve_packages.assert_called_once_with(project, output_dir.join_within_root(MIRROR_DIR))


@mock.patch("cachi2.core.package_managers.yarn_classic.main.run_yarn_cmd")
Expand Down
60 changes: 53 additions & 7 deletions tests/unit/package_managers/yarn_classic/test_resolver.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import io
import json
import re
import tarfile
from pathlib import Path
from unittest import mock
from urllib.parse import quote
Expand All @@ -9,6 +12,7 @@

from cachi2.core.checksum import ChecksumInfo
from cachi2.core.errors import PackageRejected, UnexpectedFormat
from cachi2.core.package_managers.yarn_classic.main import MIRROR_DIR
from cachi2.core.package_managers.yarn_classic.project import PackageJson
from cachi2.core.package_managers.yarn_classic.resolver import (
FilePackage,
Expand All @@ -24,6 +28,7 @@
_is_from_npm_registry,
_is_git_url,
_is_tarball_url,
_read_name_from_tarball,
_YarnClassicPackageFactory,
resolve_packages,
)
Expand Down Expand Up @@ -107,7 +112,11 @@ def test__is_from_npm_registry_can_parse_incorrect_registry_urls() -> None:
assert not _is_from_npm_registry("https://example.org/fecha.tar.gz")


def test_create_package_from_pyarn_package(rooted_tmp_path: RootedPath) -> None:
@mock.patch("cachi2.core.package_managers.yarn_classic.resolver._read_name_from_tarball")
def test_create_package_from_pyarn_package(
mock_read_name_from_tarball: mock.Mock,
rooted_tmp_path: RootedPath,
) -> None:
test_cases: list[tuple[PYarnPackage, YarnClassicPackage]] = [
(
PYarnPackage(
Expand Down Expand Up @@ -182,13 +191,14 @@ def test_create_package_from_pyarn_package(rooted_tmp_path: RootedPath) -> None:
]

for pyarn_package, expected_package in test_cases:
mock_read_name_from_tarball.return_value = expected_package.name
runtime_deps = (
set()
if expected_package.dev
else set({f"{pyarn_package.name}@{pyarn_package.version}"})
)

package_factory = _YarnClassicPackageFactory(rooted_tmp_path, runtime_deps)
package_factory = _YarnClassicPackageFactory(rooted_tmp_path, rooted_tmp_path, runtime_deps)
assert package_factory.create_package_from_pyarn_package(pyarn_package) == expected_package


Expand All @@ -203,7 +213,7 @@ def test_create_package_from_pyarn_package_fail_absolute_path(rooted_tmp_path: R
f"({pyarn_package.path}), which is not permitted."
)

package_factory = _YarnClassicPackageFactory(rooted_tmp_path, set())
package_factory = _YarnClassicPackageFactory(rooted_tmp_path, rooted_tmp_path, set())
with pytest.raises(PackageRejected, match=re.escape(error_msg)):
package_factory.create_package_from_pyarn_package(pyarn_package)

Expand All @@ -217,7 +227,7 @@ def test_create_package_from_pyarn_package_fail_path_outside_root(
path="../path/outside/root",
)

package_factory = _YarnClassicPackageFactory(rooted_tmp_path, set())
package_factory = _YarnClassicPackageFactory(rooted_tmp_path, rooted_tmp_path, set())
with pytest.raises(PathOutsideRoot):
package_factory.create_package_from_pyarn_package(pyarn_package)

Expand All @@ -231,7 +241,7 @@ def test_create_package_from_pyarn_package_fail_unexpected_format(
url="ftp://some-tarball.tgz",
)

package_factory = _YarnClassicPackageFactory(rooted_tmp_path, set())
package_factory = _YarnClassicPackageFactory(rooted_tmp_path, rooted_tmp_path, set())
with pytest.raises(UnexpectedFormat):
package_factory.create_package_from_pyarn_package(pyarn_package)

Expand All @@ -258,7 +268,7 @@ def test__get_packages_from_lockfile(
mock.call(mock_pyarn_package_2),
]

output = _get_packages_from_lockfile(rooted_tmp_path, mock_yarn_lock, set())
output = _get_packages_from_lockfile(rooted_tmp_path, rooted_tmp_path, mock_yarn_lock, set())

mock_pyarn_lockfile.packages.assert_called_once()
mock_create_package.assert_has_calls(create_package_expected_calls)
Expand Down Expand Up @@ -293,7 +303,7 @@ def test_resolve_packages(
mock_get_lockfile_packages.return_value = lockfile_packages
mock_get_workspace_packages.return_value = workspace_packages

output = resolve_packages(project)
output = resolve_packages(project, rooted_tmp_path.join_within_root(MIRROR_DIR))
mock_extract_workspaces.assert_called_once_with(rooted_tmp_path)
mock_get_yarn_lock.assert_called_once_with(yarn_lock_path)
mock_get_main_package.assert_called_once_with(project.source_dir, project.package_json)
Expand All @@ -302,6 +312,7 @@ def test_resolve_packages(
)
mock_get_lockfile_packages.assert_called_once_with(
rooted_tmp_path,
rooted_tmp_path.join_within_root(MIRROR_DIR),
mock_get_yarn_lock.return_value,
find_runtime_deps.return_value,
)
Expand Down Expand Up @@ -440,3 +451,38 @@ def test_package_purl(rooted_tmp_path_repo: RootedPath) -> None:

for package, expected_purl in yarn_classic_packages:
assert package.purl == expected_purl


def mock_tarball(path: RootedPath, package_json_content: dict[str, str]) -> RootedPath:
tarball_path = path.join_within_root("package.tar.gz")

if not package_json_content:
with tarfile.open(tarball_path, mode="w:gz") as tar:
tar.addfile(tarfile.TarInfo(name="foo"), io.BytesIO())

return tarball_path

with tarfile.open(tarball_path, mode="w:gz") as tar:
package_json_bytes = json.dumps(package_json_content).encode("utf-8")
info = tarfile.TarInfo(name="package.json")
info.size = len(package_json_bytes)
tar.addfile(info, io.BytesIO(package_json_bytes))

return tarball_path


def test_successful_name_extraction(rooted_tmp_path: RootedPath) -> None:
tarball_path = mock_tarball(path=rooted_tmp_path, package_json_content={"name": "foo"})
assert _read_name_from_tarball(tarball_path) == "foo"


def test_no_package_json(rooted_tmp_path: RootedPath) -> None:
tarball_path = mock_tarball(path=rooted_tmp_path, package_json_content={})
with pytest.raises(ValueError, match="No package.json found"):
_read_name_from_tarball(tarball_path)


def test_missing_name_field(rooted_tmp_path: RootedPath) -> None:
tarball_path = mock_tarball(path=rooted_tmp_path, package_json_content={"key": "foo"})
with pytest.raises(ValueError, match="No 'name' field found"):
_read_name_from_tarball(tarball_path)

0 comments on commit 7615f82

Please sign in to comment.