Skip to content

Commit

Permalink
Use pipeline_id for created_by field
Browse files Browse the repository at this point in the history
- For now pipeline_id should be module name of pipeline

Signed-off-by: Keshav Priyadarshi <[email protected]>
  • Loading branch information
keshav-space committed Sep 13, 2024
1 parent df17977 commit 32a34f4
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 26 deletions.
6 changes: 5 additions & 1 deletion vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
from vulnerabilities.importers import ubuntu_usn
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import nginx_importer
from vulnerabilities.pipelines import pypa_importer

Expand Down Expand Up @@ -79,4 +80,7 @@
nginx_importer.NginxImporterPipeline,
]

IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY}
IMPORTERS_REGISTRY = {
x.pipeline_id if issubclass(x, VulnerableCodeBaseImporterPipeline) else x.qualified_name: x
for x in IMPORTERS_REGISTRY
}
6 changes: 5 additions & 1 deletion vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from vulnerabilities.improvers import valid_versions
from vulnerabilities.improvers import vulnerability_kev
from vulnerabilities.improvers import vulnerability_status
from vulnerabilities.pipelines import VulnerableCodePipeline
from vulnerabilities.pipelines import flag_ghost_packages

IMPROVERS_REGISTRY = [
Expand All @@ -34,4 +35,7 @@
flag_ghost_packages.FlagGhostPackagePipeline,
]

IMPROVERS_REGISTRY = {x.qualified_name: x for x in IMPROVERS_REGISTRY}
IMPROVERS_REGISTRY = {
x.pipeline_id if issubclass(x, VulnerableCodePipeline) else x.qualified_name: x
for x in IMPROVERS_REGISTRY
}
5 changes: 3 additions & 2 deletions vulnerabilities/management/commands/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,15 @@ def import_data(self, importers):
failed_importers = []

for importer in importers:
self.stdout.write(f"Importing data using {importer.qualified_name}")
if issubclass(importer, VulnerableCodeBaseImporterPipeline):
self.stdout.write(f"Importing data using {importer.pipeline_id}")
status, error = importer().execute()
if status != 0:
self.stdout.write(error)
failed_importers.append(importer.qualified_name)
failed_importers.append(importer.pipeline_id)
continue

self.stdout.write(f"Importing data using {importer.qualified_name}")
try:
ImportRunner(importer).run()
self.stdout.write(
Expand Down
5 changes: 3 additions & 2 deletions vulnerabilities/management/commands/improve.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,15 @@ def improve_data(self, improvers):
failed_improvers = []

for improver in improvers:
self.stdout.write(f"Improving data using {improver.qualified_name}")
if issubclass(improver, VulnerableCodePipeline):
self.stdout.write(f"Improving data using {improver.pipeline_id}")
status, error = improver().execute()
if status != 0:
self.stdout.write(error)
failed_improvers.append(improver.qualified_name)
failed_improvers.append(improver.pipeline_id)
continue

self.stdout.write(f"Improving data using {improver.qualified_name}")
try:
ImproveRunner(improver_class=improver).run()
self.stdout.write(
Expand Down
20 changes: 12 additions & 8 deletions vulnerabilities/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@


class VulnerableCodePipeline(BasePipeline):
pipeline_id = None # Unique Pipeline ID

def log(self, message, level=logging.INFO):
"""Log the given `message` to the current module logger and execution_log."""
now_local = datetime.now(timezone.utc).astimezone()
Expand All @@ -36,11 +38,12 @@ def log(self, message, level=logging.INFO):
self.append_to_log(message)

@classproperty
def qualified_name(cls):
"""
Fully qualified name prefixed with the module name of the pipeline used in logging.
"""
return f"{cls.__module__}.{cls.__qualname__}"
def pipeline_id(cls):
"""Return unique pipeline_id set in cls.pipeline_id"""

if cls.pipeline_id is None or cls.pipeline_id == "":
raise NotImplementedError("pipeline_id is not defined or is empty")
return cls.pipeline_id


class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
Expand All @@ -52,6 +55,7 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline):
Also override the ``steps`` and ``advisory_confidence`` as needed.
"""

pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module.
license_url = None
spdx_license_expression = None
repo_url = None
Expand Down Expand Up @@ -89,7 +93,7 @@ def collect_and_store_advisories(self):
for advisory in progress.iter(self.collect_advisories()):
if _obj := insert_advisory(
advisory=advisory,
pipeline_name=self.qualified_name,
pipeline_id=self.pipeline_id,
logger=self.log,
):
collected_advisory_count += 1
Expand All @@ -98,7 +102,7 @@ def collect_and_store_advisories(self):

def import_new_advisories(self):
new_advisories = Advisory.objects.filter(
created_by=self.qualified_name,
created_by=self.pipeline_id,
date_imported__isnull=True,
)

Expand All @@ -119,7 +123,7 @@ def import_advisory(self, advisory: Advisory) -> int:
try:
import_advisory(
advisory=advisory,
pipeline_name=self.qualified_name,
pipeline_id=self.pipeline_id,
confidence=self.advisory_confidence,
logger=self.log,
)
Expand Down
2 changes: 2 additions & 0 deletions vulnerabilities/pipelines/flag_ghost_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
class FlagGhostPackagePipeline(VulnerableCodePipeline):
"""Detect and flag packages that do not exist upstream."""

pipeline_id = "flag_ghost_packages"

@classmethod
def steps(cls):
return (cls.flag_ghost_packages,)
Expand Down
5 changes: 4 additions & 1 deletion vulnerabilities/pipelines/pypa_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
from fetchcode.vcs import fetch_via_vcs

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importers.osv import parse_advisory_data
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.utils import get_advisory_url


class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from PyPA GitHub repository."""

pipeline_id = "pypa_importer"

spdx_license_expression = "CC-BY-4.0"
license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE"
repo_url = "git+https://github.com/pypa/advisory-database"
Expand All @@ -45,6 +46,8 @@ def advisories_count(self):
return sum(1 for _ in vulns_directory.rglob("*.yaml"))

def collect_advisories(self) -> Iterable[AdvisoryData]:
from vulnerabilities.importers.osv import parse_advisory_data

base_directory = Path(self.vcs_response.dest_dir)
vulns_directory = base_directory / "vulns"
self.advisories_count = sum(1 for _ in vulns_directory.rglob("*.yaml"))
Expand Down
10 changes: 5 additions & 5 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from vulnerabilities.models import Weakness


def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable = None):
def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable = None):
obj = None
try:
obj, _ = Advisory.objects.get_or_create(
Expand All @@ -38,7 +38,7 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable
weaknesses=advisory.weaknesses,
url=advisory.url,
defaults={
"created_by": pipeline_name,
"created_by": pipeline_id,
"date_collected": datetime.now(timezone.utc),
},
)
Expand All @@ -55,7 +55,7 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable
@transaction.atomic
def import_advisory(
advisory: Advisory,
pipeline_name: str,
pipeline_id: str,
confidence: int = MAX_CONFIDENCE,
logger: Callable = None,
):
Expand Down Expand Up @@ -141,7 +141,7 @@ def import_advisory(
PackageRelatedVulnerability(
vulnerability=vulnerability,
package=vulnerable_package,
created_by=pipeline_name,
created_by=pipeline_id,
confidence=confidence,
fix=False,
).update_or_create(advisory=advisory)
Expand All @@ -151,7 +151,7 @@ def import_advisory(
PackageRelatedVulnerability(
vulnerability=vulnerability,
package=fixed_package,
created_by=pipeline_name,
created_by=pipeline_id,
confidence=confidence,
fix=True,
).update_or_create(advisory=advisory)
Expand Down
7 changes: 5 additions & 2 deletions vulnerabilities/tests/pipelines/test_base_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def test_collect_and_store_advisories(self, mock_advisories_count, mock_collect_
self.assertEqual(0, models.Advisory.objects.count())

base_pipeline = VulnerableCodeBaseImporterPipeline()
base_pipeline.pipeline_id = "test_pipeline"

base_pipeline.collect_and_store_advisories()

mock_advisories_count.assert_called_once()
Expand All @@ -74,13 +76,14 @@ def test_collect_and_store_advisories(self, mock_advisories_count, mock_collect_
expected_aliases = advisory_data1.aliases

self.assertEqual(expected_aliases, result_aliases)
self.assertEqual(base_pipeline.qualified_name, collected_advisory.created_by)
self.assertEqual(base_pipeline.pipeline_id, collected_advisory.created_by)

def test_import_new_advisories(self):
self.assertEqual(0, models.Vulnerability.objects.count())

base_pipeline = VulnerableCodeBaseImporterPipeline()
advisory1 = get_advisory1(created_by=base_pipeline.qualified_name)
base_pipeline.pipeline_id = "test_pipeline"
advisory1 = get_advisory1()
base_pipeline.import_new_advisories()

self.assertEqual(1, models.Vulnerability.objects.count())
Expand Down
66 changes: 66 additions & 0 deletions vulnerabilities/tests/pipelines/test_pipeline_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import importlib
import inspect
import unittest
from pathlib import Path

from vulnerabilities.pipelines import VulnerableCodePipeline

PIPELINE_DIR = Path(__file__).parent.parent.parent / "pipelines"


class PipelineTests(unittest.TestCase):
def setUp(self):
self.pipeline_dict = self.collect_pipeline_ids()

def collect_pipeline_ids(self):
"""Return pipeline_ids from all the VulnerableCodePipeline."""
pipeline_dict = {}

for pipeline in PIPELINE_DIR.glob("*.py"):
if pipeline.name == "__init__.py":
continue

module_name = pipeline.stem
module = importlib.import_module(f"vulnerabilities.pipelines.{module_name}")

for _, obj in inspect.getmembers(module, inspect.isclass):
if issubclass(obj, VulnerableCodePipeline) and obj is not VulnerableCodePipeline:
pipeline_id = obj.pipeline_id
pipeline_dict[obj] = pipeline_id
break

return pipeline_dict

def test_no_empty_pipeline_ids(self):
empty_pipeline_ids = [cls for cls, pid in self.pipeline_dict.items() if pid == ""]

if empty_pipeline_ids:
error_messages = [
f"{cls.__name__} has empty pipeline_id." for cls in empty_pipeline_ids
]
error_message = "`pipeline_id` should not be empty string:\n" + "\n".join(
error_messages
)
assert False, error_message

def test_no_none_pipeline_ids(self):
none_pipeline_ids = [cls for cls, pid in self.pipeline_dict.items() if pid == None]

if none_pipeline_ids:
error_messages = [f"{cls.__name__} has None pipeline_id." for cls in none_pipeline_ids]
error_message = "`pipeline_id` should not be None:\n" + "\n".join(error_messages)
assert False, error_message

def test_unique_pipeline_ids(self):
pipeline_ids = self.pipeline_dict.values()
unique_ids = set(pipeline_ids)
assert len(pipeline_ids) == len(unique_ids), "`pipeline_id` should be unique."
8 changes: 4 additions & 4 deletions vulnerabilities/tests/pipes/test_advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,16 +60,16 @@ def get_all_vulnerability_relationships_objects():
@pytest.mark.django_db
def test_vulnerability_pipes_importer_import_advisory():
advisory1 = get_advisory1(created_by="test_importer_pipeline")
import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline")
import_advisory(advisory=advisory1, pipeline_id="test_importer_pipeline")
all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects()
import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline")
import_advisory(advisory=advisory1, pipeline_id="test_importer_pipeline")
assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects()


@pytest.mark.django_db
def test_vulnerability_pipes_importer_import_advisory_different_pipelines():
advisory1 = get_advisory1(created_by="test_importer_pipeline")
import_advisory(advisory=advisory1, pipeline_name="test_importer1_pipeline")
import_advisory(advisory=advisory1, pipeline_id="test_importer1_pipeline")
all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects()
import_advisory(advisory=advisory1, pipeline_name="test_importer2_pipeline")
import_advisory(advisory=advisory1, pipeline_id="test_importer2_pipeline")
assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects()

0 comments on commit 32a34f4

Please sign in to comment.