Skip to content

Commit

Permalink
Resolve dependencies from lockfiles (#1244)
Browse files Browse the repository at this point in the history
* Resolve dependencies from lockfiles #1237

Reference: #1237
Reference: #1066
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Address feedback and add improvements

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Improve dependency resolving from lockfiles #1237

Resolves dependency for cases where multiple requirements
are resolved by one package and all the version requirements
are joined for that package.

Reference: #1237
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Update scancode-toolkit and fix tests

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Bump scancode-toolkit to v32.2.0

Reference: https://github.com/nexB/scancode-toolkit/releases/tag/v32.2.0
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Regenerate test fixtures and expectations

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Improve dependency resolver for lockfiles

Handle various lockfile cases where:
* Same package/dependencies are present in different lockfiles
* Independent lockfiles without a manifest and root package
* Ecosystems which have only a single version of package in
  their environment
* Dependency graphs where a resolved package can have many
  parent packages.

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Address feedback and refactor code

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* FIx bugs for resolving python packages

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Add unit tests and refactor code

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

* Address comments and add CHANGELOG entries

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>

---------

Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
  • Loading branch information
AyanSinhaMahapatra authored Jul 1, 2024
1 parent 53ce3b0 commit 08c54b1
Show file tree
Hide file tree
Showing 42 changed files with 2,913 additions and 661 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,24 @@ v34.6.4 (unreleased)
related work directories created more than a specified number of days ago.
https://github.com/nexB/scancode.io/issues/1289

- Update the ``inspect_packages`` pipeline to have an optional ``Static Resolver``
group to create resolved packages and dependency relationships from lockfiles
and manifests having pre-resolved dependencies. Also update this pipeline to
perform package assembly from multiple manifests and files to create
discovered packages. Also update the ``resolve_dependencies`` pipeline to have
the same ``Static Resolver`` group and mode the dynamic resolution part to a new
optional ``Dynamic Resolver`` group.
See https://github.com/nexB/scancode.io/pull/1244

- Add a new attribute ``is_direct`` to the DiscoveredDependency model and two new
attributes ``is_private`` and ``is_virtual`` to the DiscoveredPackage model.
Also update the UIs to show these attributes and show the ``package_data`` field
contents for CodebaseResources in the ``extra_data`` tab.
See https://github.com/nexB/scancode.io/pull/1244

- Update scancode-toolkit to version ``32.2.0``. For the complete list of updates
and improvements see https://github.com/nexB/scancode-toolkit/releases/tag/v32.2.0

v34.6.3 (2024-06-21)
--------------------

Expand Down
3 changes: 3 additions & 0 deletions scanpipe/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,8 @@ class Meta:
"source_packages",
"extra_data",
"package_uid",
"is_private",
"is_virtual",
"datasource_ids",
"datafile_paths",
"file_references",
Expand All @@ -409,6 +411,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"dependency_uid",
"for_package_uid",
"resolved_to_package_uid",
Expand Down
8 changes: 8 additions & 0 deletions scanpipe/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,8 @@ class PackageFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
declared_license_expression = django_filters.filters.CharFilter(
widget=HasValueDropdownWidget
)
is_private = StrictBooleanFilter()
is_virtual = StrictBooleanFilter()

class Meta:
model = DiscoveredPackage
Expand Down Expand Up @@ -721,6 +723,8 @@ class Meta:
"is_vulnerable",
"compliance_alert",
"tag",
"is_private",
"is_virtual",
]


Expand All @@ -731,6 +735,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand All @@ -751,6 +756,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"for_package",
"resolved_to_package",
"datafile_resource",
Expand All @@ -765,6 +771,7 @@ class DependencyFilterSet(FilterSetUtilsMixin, django_filters.FilterSet):
is_runtime = StrictBooleanFilter()
is_optional = StrictBooleanFilter()
is_resolved = StrictBooleanFilter()
is_direct = StrictBooleanFilter()
is_vulnerable = IsVulnerable(field_name="affected_by_vulnerabilities")

class Meta:
Expand All @@ -783,6 +790,7 @@ class Meta:
"is_runtime",
"is_optional",
"is_resolved",
"is_direct",
"datasource_id",
"is_vulnerable",
]
Expand Down
79 changes: 79 additions & 0 deletions scanpipe/migrations/0062_dependency_resolver_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Generated by Django 5.0.6 on 2024-06-04 20:48

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("scanpipe", "0061_codebaseresource_is_legal_and_more"),
]

operations = [
migrations.AddField(
model_name="discovereddependency",
name="is_direct",
field=models.BooleanField(
default=False,
help_text="True if this is a direct, first-level dependency relationship for a package.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_private",
field=models.BooleanField(
default=False,
help_text="True if this is a private package, either not meant to be published on a repository, and/or a local package without a name and version used primarily to track dependencies and other information.",
),
),
migrations.AddField(
model_name="discoveredpackage",
name="is_virtual",
field=models.BooleanField(
default=False,
help_text="True if this package is created only from a manifest or lockfile, and not from its actual packaged code. The files of this package are not present in the codebase.",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_optional",
field=models.BooleanField(
default=False,
help_text="True if this dependency is an optional dependency",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_resolved",
field=models.BooleanField(
default=False,
help_text="True if this dependency version requirement has been pinned and this dependency points to an exact version.",
),
),
migrations.AlterField(
model_name="discovereddependency",
name="is_runtime",
field=models.BooleanField(
default=False,
help_text="True if this dependency is a runtime dependency.",
),
),
migrations.AddIndex(
model_name="discovereddependency",
index=models.Index(
fields=["is_direct"], name="scanpipe_di_is_dire_6dc594_idx"
),
),
migrations.AddIndex(
model_name="discoveredpackage",
index=models.Index(
fields=["is_private"], name="scanpipe_di_is_priv_9ffd1a_idx"
),
),
migrations.AddIndex(
model_name="discoveredpackage",
index=models.Index(
fields=["is_virtual"], name="scanpipe_di_is_virt_c5c176_idx"
),
),
]
115 changes: 103 additions & 12 deletions scanpipe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
from licensedcode.cache import get_licensing
from matchcode_toolkit.fingerprinting import IGNORED_DIRECTORY_FINGERPRINTS
from packagedcode.models import build_package_uid
from packagedcode.utils import get_base_purl
from packageurl import PackageURL
from packageurl import normalize_qualifiers
from packageurl.contrib.django.models import PackageURLMixin
Expand Down Expand Up @@ -1031,6 +1032,19 @@ def walk_codebase_path(self):
"""Return files and directories path of the codebase/ directory recursively."""
return self.codebase_path.rglob("*")

def get_resource(self, path):
"""
Return the codebase resource present for a given path,
or None the resource with that path does not exist.
This path is relative to the scan location.
This is same as the Codebase.get_resource() function.
"""
# We don't want to raise an exception if there is no resource
# as this function is also called from the SCTK side
resource = self.codebaseresources.get_or_none(path=path)
if resource:
return resource

@cached_property
def can_change_inputs(self):
"""
Expand Down Expand Up @@ -3061,6 +3075,23 @@ class AbstractPackage(models.Model):
blank=True,
help_text=_("A notice text for this package."),
)
is_private = models.BooleanField(
default=False,
help_text=_(
"True if this is a private package, either not meant to be "
"published on a repository, and/or a local package without a "
"name and version used primarily to track dependencies and "
"other information."
),
)
is_virtual = models.BooleanField(
default=False,
help_text=_(
"True if this package is created only from a manifest or lockfile, "
"and not from its actual packaged code. The files of this package "
"are not present in the codebase."
),
)
datasource_ids = models.JSONField(
default=list,
blank=True,
Expand Down Expand Up @@ -3163,6 +3194,8 @@ class Meta:
models.Index(fields=["sha512"]),
models.Index(fields=["compliance_alert"]),
models.Index(fields=["tag"]),
models.Index(fields=["is_private"]),
models.Index(fields=["is_virtual"]),
]
constraints = [
models.UniqueConstraint(
Expand Down Expand Up @@ -3190,15 +3223,7 @@ def purl(self):

@classmethod
def extract_purl_data(cls, package_data):
purl_data = {}

for field_name in PURL_FIELDS:
value = package_data.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
purl_data[field_name] = value or ""

return purl_data
return normalize_package_url_data(package_data)

@classmethod
def create_from_data(cls, project, package_data):
Expand Down Expand Up @@ -3530,9 +3555,28 @@ class DiscoveredDependency(
"The identifier for the datafile handler used to obtain this dependency."
),
)
is_runtime = models.BooleanField(default=False)
is_optional = models.BooleanField(default=False)
is_resolved = models.BooleanField(default=False)
is_runtime = models.BooleanField(
default=False,
help_text=_("True if this dependency is a runtime dependency."),
)
is_optional = models.BooleanField(
default=False,
help_text=_("True if this dependency is an optional dependency"),
)
is_resolved = models.BooleanField(
default=False,
help_text=_(
"True if this dependency version requirement has been pinned "
"and this dependency points to an exact version."
),
)
is_direct = models.BooleanField(
default=False,
help_text=_(
"True if this is a direct, first-level dependency relationship "
"for a package."
),
)

objects = DiscoveredDependencyQuerySet.as_manager()

Expand All @@ -3553,6 +3597,7 @@ class Meta:
models.Index(fields=["is_runtime"]),
models.Index(fields=["is_optional"]),
models.Index(fields=["is_resolved"]),
models.Index(fields=["is_direct"]),
]
constraints = [
models.UniqueConstraint(
Expand All @@ -3574,6 +3619,10 @@ def get_absolute_url(self):
def purl(self):
return self.package_url

@property
def base_purl(self):
return get_base_purl(self.package_url)

@property
def package_type(self):
return self.type
Expand All @@ -3599,6 +3648,7 @@ def create_from_data(
project,
dependency_data,
for_package=None,
resolved_to_package=None,
datafile_resource=None,
datasource_id=None,
strip_datafile_path_root=False,
Expand Down Expand Up @@ -3638,6 +3688,13 @@ def create_from_data(
package_uid=for_package_uid
)

if not resolved_to_package:
resolved_to_uid = dependency_data.get("resolved_to_uid")
if resolved_to_uid:
resolved_to_package = project.discoveredpackages.get(
package_uid=resolved_to_uid
)

if not datafile_resource:
datafile_path = dependency_data.get("datafile_path")
if datafile_path:
Expand All @@ -3663,10 +3720,25 @@ def create_from_data(
return cls.objects.create(
project=project,
for_package=for_package,
resolved_to_package=resolved_to_package,
datafile_resource=datafile_resource,
**cleaned_data,
)

@classmethod
def extract_purl_data(cls, dependency_data, ignore_nulls=False):
purl_mapping = PackageURL.from_string(
purl=dependency_data.get("purl"),
).to_dict()

return normalize_package_url_data(purl_mapping, ignore_nulls)

@classmethod
def populate_dependency_uuid(cls, dependency_data):
purl = PackageURL.from_string(purl=dependency_data.get("purl"))
purl.qualifiers["uuid"] = str(uuid.uuid4())
dependency_data["dependency_uid"] = purl.to_string()

@property
def spdx_id(self):
return f"SPDXRef-scancodeio-{self._meta.model_name}-{self.dependency_uid}"
Expand Down Expand Up @@ -3694,6 +3766,25 @@ def as_spdx(self):
)


def normalize_package_url_data(purl_mapping, ignore_nulls=False):
"""
Normalize a mapping of purl data so database queries with
purl data can be executed.
"""
normalized_purl_mapping = {}
for field_name in PURL_FIELDS:
value = purl_mapping.get(field_name)
if field_name == "qualifiers":
value = normalize_qualifiers(value, encode=True)
if not ignore_nulls:
normalized_purl_mapping[field_name] = value or ""
else:
if value:
normalized_purl_mapping[field_name] = value or ""

return normalized_purl_mapping


class WebhookSubscription(UUIDPKModel, ProjectRelatedModel):
target_url = models.URLField(_("Target URL"), max_length=1024)
created_date = models.DateTimeField(auto_now_add=True, editable=False)
Expand Down
Loading

0 comments on commit 08c54b1

Please sign in to comment.