Skip to content

Commit

Permalink
Add pipeline to populate PurlDB #720
Browse files Browse the repository at this point in the history
Signed-off-by: Keshav Priyadarshi <[email protected]>
  • Loading branch information
keshav-space committed Jul 4, 2023
1 parent 4007681 commit b7cd512
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 0 deletions.
80 changes: 80 additions & 0 deletions scanpipe/pipelines/populate_purldb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from django.db.models import Q

from scanpipe.models import posix_regex_to_django_regex_lookup
from scanpipe.pipelines import Pipeline
from scanpipe.pipes import purldb


class PopulatePurlDB(Pipeline):
"""
Populate PurlDB with project packages.
Ignore PURLs where namespace matches the pattern supplied
under ``ignored_namespace`` in scancode-config.yml.
"""

@classmethod
def steps(cls):
return (
cls.populate_purldb_discoveredpackage,
cls.populate_purldb_discovereddependency,
)

@property
def ignored_namespaces(self):
return self.env.get("ignored_namespaces", [])

def populate_purldb_discoveredpackage(self):
"""Add DiscoveredPackage to PurlDB."""
feed_purldb(
package_object=self.project.discoveredpackages,
ignored_namespaces=self.ignored_namespaces,
logger=self.log,
)

def populate_purldb_discovereddependency(self):
"""Add DiscoveredDependency to PurlDB."""
feed_purldb(
package_object=self.project.discovereddependencies,
ignored_namespaces=self.ignored_namespaces,
logger=self.log,
)


def feed_purldb(package_object, ignored_namespaces, logger):
if not purldb.is_available():
raise Exception("PurlDB is not configured.")

combined_pattern = Q()
for pattern in ignored_namespaces:
combined_pattern |= Q(
namespace__regex=posix_regex_to_django_regex_lookup(pattern)
)

packages = package_object.exclude(combined_pattern)

logger(f"Populating PurlDB with {len(packages):,d} PURLs")
for purl in list(set(packages)):
purldb.index_package(purl)
10 changes: 10 additions & 0 deletions scanpipe/pipes/purldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,13 @@ def match_resource(sha1_list, timeout=None, api_url=PURLDB_API_URL):
if response and response.get("count"):
packages = response["results"]
return packages


def index_package(purl, timeout=None, api_url=PURLDB_API_URL):
"""Add PURL to PurlDB for indexing."""
payload = {"purl": purl}
response = request_get(
url=f"{api_url}packages/get_package/", payload=payload, timeout=timeout
)

return response
30 changes: 30 additions & 0 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -897,3 +897,33 @@ def test_scanpipe_deploy_to_develop_pipeline_with_about_file(self):
result_file = output.to_json(project1)
expected_file = data_dir / "expected.json"
self.assertPipelineResultEqual(expected_file, result_file)

@mock.patch("scanpipe.pipes.purldb.request_get")
@mock.patch("scanpipe.pipes.purldb.is_available")
def test_scanpipe_populate_purldb_pipeline_integration_test(
self, mock_is_available, mock_request_get
):
pipeline_name1 = "load_inventory"
pipeline_name2 = "populate_purldb"
project1 = Project.objects.create(name="Utility: PurlDB")

input_location = self.data_location / "asgiref-3.3.0_toolkit_scan.json"
project1.copy_input_from(input_location)

run = project1.add_pipeline(pipeline_name1)
pipeline = run.make_pipeline_instance()

exitcode, out = pipeline.execute()
self.assertEqual(0, exitcode, msg=out)

mock_request_get.return_value = {}
mock_is_available.return_value = True

run = project1.add_pipeline(pipeline_name2)
pipeline = run.make_pipeline_instance()

exitcode, out = pipeline.execute()
self.assertEqual(0, exitcode, msg=out)

self.assertIn("Populating PurlDB with 2 PURLs", run.log)
self.assertIn("Populating PurlDB with 4 PURLs", run.log)
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ scancodeio_pipelines =
find_vulnerabilities = scanpipe.pipelines.find_vulnerabilities:FindVulnerabilities
inspect_manifest = scanpipe.pipelines.inspect_manifest:InspectManifest
load_inventory = scanpipe.pipelines.load_inventory:LoadInventory
populate_purldb = scanpipe.pipelines.populate_purldb:PopulatePurlDB
root_filesystems = scanpipe.pipelines.root_filesystems:RootFS
scan_codebase = scanpipe.pipelines.scan_codebase:ScanCodebase
scan_package = scanpipe.pipelines.scan_package:ScanPackage
Expand Down

0 comments on commit b7cd512

Please sign in to comment.