Skip to content

Commit

Permalink
Add validation
Browse files Browse the repository at this point in the history
Added platform check - the way bundler works with platforms has changed
over the years. See how it
[works](https://prathamesh.tech/2021/04/18/bundler-2-2-3-and-deployment-of-ruby-apps/)
[now](https://www.moncefbelyamani.com/understanding-the-gemfile-lock-file/#platforms)
x how [it used to
work](rubygems/rubygems#4269 (comment)).
TLDR: One Gemfile.lock can contain dependencies for multiple platforms
and the way they will be resolved at deploy/install time changed with
bundler version 2.2.0 and 2.2.3.

I decided to implement it the simplest way - support only Gemfile.lock
with only one platform specified - ruby. This way, we will be able to
support all bundler versions and it will be simpler to parse the
resulting file (scancode-toolkit isn't able to parse files with multiple
platform versions of one package).

Added check for Git reference (40 character hash). For GEM dependencies,
semantic versioning is recommended, [but not
enforced](https://guides.rubygems.org/patterns/), therefore the only
thing that's validated is if the version isn't None (similarly for
PATH).

Signed-off-by: Milan Tichavský <[email protected]>
  • Loading branch information
mtichavsky committed Jun 7, 2022
1 parent a3d3648 commit 26ee64c
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 8 deletions.
34 changes: 30 additions & 4 deletions cachito/workers/pkg_managers/ruby.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import re
from dataclasses import dataclass
from pathlib import Path

import packagedcode.gemfile_lock as gemlock_parser

from cachito.errors import ValidationError

GIT_REF_FORMAT = re.compile(r"^[a-fA-F0-9]{40}$")
PLATFORMS_RUBY = re.compile(r"^PLATFORMS\n {2}ruby\n\n", re.MULTILINE)


@dataclass
class GemMetadata:
Expand All @@ -22,19 +26,35 @@ def parse_gemlock(gemlock_path: Path):
:param Path gemlock_path: the full path to Gemfile.lock
:return: list of Gems
"""
all_gems = gemlock_parser.GemfileLockParser(str(gemlock_path)).all_gems
dependencies = []
if not gemlock_path.is_file():
return []

_validate_gemlock_platforms(gemlock_path)

dependencies = []
all_gems = gemlock_parser.GemfileLockParser(str(gemlock_path)).all_gems
for gem in all_gems.values():
validate_gem_metadata(gem, gemlock_path.parent)
_validate_gem_metadata(gem, gemlock_path.parent)
dependencies.append(GemMetadata(gem.name, gem.version, gem.type, gem.remote))

return dependencies


def validate_gem_metadata(gem, gemlock_dir):
def _validate_gemlock_platforms(gemlock_path):
"""Make sure Gemfile.lock contains only one platform - ruby."""
with open(gemlock_path) as f:
contents = f.read()

if not PLATFORMS_RUBY.search(contents):
msg = "PLATFORMS section of Gemfile.lock has to contain one and only platform - ruby."
raise ValidationError(msg)


def _validate_gem_metadata(gem, gemlock_dir):
"""Validate parsed Gem.
While individual gems may contain platform information, this function doesn't check it,
because it expects the Gemfile.lock to be ruby platform specific.
:param Gem gem: gem with information parsed from Gemfile.lock
:param Path gemlock_dir: the root directory containing Gemfile.lock
:raise: ValidationError
Expand All @@ -50,6 +70,12 @@ def validate_gem_metadata(gem, gemlock_dir):
elif gem.type == "GIT":
if not gem.remote.startswith("https://"):
raise ValidationError("All Ruby GIT dependencies have to use HTTPS protocol.")
if not GIT_REF_FORMAT.match(gem.version):
msg = (
f"No git ref for gem: {gem.name} (expected 40 hexadecimal characters, "
f"got: {gem.version})."
)
raise ValidationError(msg)
elif gem.type == "PATH":
dependency_dir = gemlock_dir / gem.remote
if not dependency_dir.exists():
Expand Down
79 changes: 75 additions & 4 deletions tests/test_workers/test_pkg_managers/test_ruby.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class TestGemlockParsing:
# GEM dependency
(
dedent(
"""\
"""
GEM
remote: https://rubygems.org/
specs:
Expand Down Expand Up @@ -47,6 +47,9 @@ class TestGemlockParsing:
ci_reporter (2.0.0)
builder (>= 2.1.2)
PLATFORMS
ruby
DEPENDENCIES
ci_reporter_shell!
"""
Expand Down Expand Up @@ -108,6 +111,20 @@ def test_parsing_of_valid_path_dependency(self, tmpdir):
@pytest.mark.parametrize(
"file_contents, expected_error",
(
(
dedent(
"""
GEM
remote: https://rubygems.org/
specs:
zeitwerk (2.5.4)
DEPENDENCIES
zeitwerk
"""
),
"PLATFORMS section of Gemfile.lock has to contain one and only platform - ruby.",
),
(
dedent(
"""
Expand All @@ -116,6 +133,9 @@ def test_parsing_of_valid_path_dependency(self, tmpdir):
specs:
zeitwerk (2.5.4)
PLATFORMS
ruby
DEPENDENCIES
zeitwerk
"""
Expand All @@ -133,11 +153,33 @@ def test_parsing_of_valid_path_dependency(self, tmpdir):
json-schema (3.0.0)
addressable (>= 2.4)
PLATFORMS
ruby
DEPENDENCIES
json-schema!
"""
),
"Ruby GIT dependencies have to use HTTPS protocol.",
"All Ruby GIT dependencies have to use HTTPS protocol.",
),
(
dedent(
"""
GIT
remote: https://github.com/3scale/json-schema.git
revision: xxx
specs:
json-schema (3.0.0)
addressable (>= 2.4)
PLATFORMS
ruby
DEPENDENCIES
json-schema!
"""
),
"No git ref for gem: json-schema (expected 40 hexadecimal characters, got: xxx).",
),
(
dedent(
Expand All @@ -148,6 +190,9 @@ def test_parsing_of_valid_path_dependency(self, tmpdir):
active-docs (1.0.0)
railties (> 3.1)
PLATFORMS
ruby
DEPENDENCIES
active-docs!
"""
Expand All @@ -163,6 +208,9 @@ def test_parsing_of_valid_path_dependency(self, tmpdir):
active-docs (1.0.0)
railties (> 3.1)
PLATFORMS
ruby
DEPENDENCIES
active-docs!
"""
Expand All @@ -177,11 +225,32 @@ def test_parsing_of_valid_path_dependency(self, tmpdir):
specs:
zeitwerk
PLATFORMS
ruby
DEPENDENCIES
zeitwerk
"""
),
"Unspecified name or version of a RubyGem.",
),
(
dedent(
"""
GEM
remote: https://rubygems.org/
specs:
zeitwerk (2.5.4)
PLATFORMS
ruby
x86_64-linux
DEPENDENCIES
zeitwerk
"""
),
"Unspecified name or version",
"PLATFORMS section of Gemfile.lock has to contain one and only platform - ruby.",
),
),
)
Expand All @@ -190,5 +259,7 @@ def test_parsing_of_invalid_cases(self, file_contents, expected_error, tmpdir):
gemfile_lock = tmpdir.join("Gemfile.lock")
gemfile_lock.write(file_contents)

with pytest.raises(ValidationError, match=expected_error):
with pytest.raises(ValidationError) as exc_info:
parse_gemlock(Path(gemfile_lock))

assert str(exc_info.value) == expected_error

0 comments on commit 26ee64c

Please sign in to comment.