From ec59d7acb0726c33076367a1a4b085d01f862d3b Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 00:28:06 +0530 Subject: [PATCH 1/9] Add base importer pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- vulnerabilities/management/commands/import.py | 8 ++ vulnerabilities/pipelines/__init__.py | 108 ++++++++++++++++++ .../pypa.py => pipelines/pypa_importer.py} | 0 .../test_pypa_importer_pipeline.py} | 0 vulnerabilities/tests/test_data_source.py | 2 - 6 files changed, 118 insertions(+), 4 deletions(-) rename vulnerabilities/{importers/pypa.py => pipelines/pypa_importer.py} (100%) rename vulnerabilities/tests/{test_pypa.py => pipelines/test_pypa_importer_pipeline.py} (100%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index a1475b715..27fe9c66a 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -30,7 +30,6 @@ from vulnerabilities.importers import oss_fuzz from vulnerabilities.importers import postgresql from vulnerabilities.importers import project_kb_msr2019 -from vulnerabilities.importers import pypa from vulnerabilities.importers import pysec from vulnerabilities.importers import redhat from vulnerabilities.importers import retiredotnet @@ -40,13 +39,13 @@ from vulnerabilities.importers import ubuntu_usn from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen +from vulnerabilities.pipelines import pypa_importer IMPORTERS_REGISTRY = [ nvd.NVDImporter, github.GitHubAPIImporter, gitlab.GitLabAPIImporter, npm.NpmImporter, - pypa.PyPaImporter, nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, @@ -75,6 +74,7 @@ github_osv.GithubOSVImporter, epss.EPSSImporter, vulnrichment.VulnrichImporter, + pypa_importer.PyPaImporterPipeline, ] IMPORTERS_REGISTRY = {x.qualified_name: x for x in IMPORTERS_REGISTRY} diff --git a/vulnerabilities/management/commands/import.py b/vulnerabilities/management/commands/import.py index 5ae885299..36456c8a5 100644 --- a/vulnerabilities/management/commands/import.py +++ b/vulnerabilities/management/commands/import.py @@ -13,6 +13,7 @@ from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importers import IMPORTERS_REGISTRY +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline class Command(BaseCommand): @@ -57,6 +58,13 @@ def import_data(self, importers): for importer in importers: self.stdout.write(f"Importing data using {importer.qualified_name}") + if issubclass(importer, VulnerableCodeBaseImporterPipeline): + status, error = importer().execute() + if status != 0: + self.stdout.write(error) + failed_importers.append(importer.qualified_name) + continue + try: ImportRunner(importer).run() self.stdout.write( diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 38c14a767..ecba8544d 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -9,9 +9,16 @@ import logging from datetime import datetime from datetime import timezone +from traceback import format_exc as traceback_format_exc +from typing import Iterable from aboutcode.pipeline import BasePipeline +from aboutcode.pipeline import LoopProgress +from vulnerabilities import import_runner +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.improvers.default import DefaultImporter +from vulnerabilities.models import Advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -32,3 +39,104 @@ def qualified_name(cls): Fully qualified name prefixed with the module name of the pipeline used in logging. """ return f"{cls.__module__}.{cls.__qualname__}" + + +class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): + """ + Base importer pipeline for importing advisories. + + Uses: + Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` method. + Also override the ``steps`` if needed. + """ + + license_url = None + spdx_license_expression = None + repo_url = None + importer_name = None + + @classmethod + def steps(cls): + return ( + # Add step for downloading/cloning resource as required. + cls.collect_and_store_advisories, + cls.import_new_advisories, + # Add step for removing downloaded/cloned resource as required. + ) + + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData for importer pipeline. + + Populate the `self.collected_advisories_count` field and yield AdvisoryData + """ + raise NotImplementedError + + def advisories_count(self) -> int: + """ + Return the estimated AdvisoryData to be yielded by ``collect_advisories``. + + Used by ``collect_and_store_advisories`` to log the progress of advisory collection. + """ + raise NotImplementedError + + def collect_and_store_advisories(self): + self.new_advisories = [] + + collected_advisory_count = 0 + progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) + for advisory in progress.iter(self.collect_advisories()): + self.insert_advisory(advisory=advisory) + collected_advisory_count += 1 + + self.log(f"Successfully collected {collected_advisory_count:,d} advisories") + + def insert_advisory(self, advisory: AdvisoryData): + try: + obj, created = Advisory.objects.get_or_create( + aliases=advisory.aliases, + summary=advisory.summary, + affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], + references=[ref.to_dict() for ref in advisory.references], + date_published=advisory.date_published, + weaknesses=advisory.weaknesses, + defaults={ + "created_by": self.qualified_name, + "date_collected": datetime.now(timezone.utc), + }, + url=advisory.url, + ) + if created: + self.new_advisories.append(obj) + except Exception as e: + self.log( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + def import_new_advisories(self): + new_advisories_count = len(self.new_advisories) + + imported_advisory_count = 0 + progress = LoopProgress(total_iterations=new_advisories_count, logger=self.log) + for advisory in progress.iter(self.new_advisories): + self.import_advisory(advisory=advisory) + imported_advisory_count += 1 + + self.log(f"Successfully imported {imported_advisory_count:,d} new advisories") + + def import_advisory(self, advisory) -> None: + if advisory.date_imported: + return + try: + advisory_importer = DefaultImporter(advisories=[advisory]) + inferences = advisory_importer.get_inferences(advisory_data=advisory.to_advisory_data()) + import_runner.process_inferences( + inferences=inferences, + advisory=advisory, + improver_name=self.qualified_name, + ) + except Exception as e: + self.log( + f"Failed to process advisory: {advisory!r} with error {e!r}", level=logging.ERROR + ) diff --git a/vulnerabilities/importers/pypa.py b/vulnerabilities/pipelines/pypa_importer.py similarity index 100% rename from vulnerabilities/importers/pypa.py rename to vulnerabilities/pipelines/pypa_importer.py diff --git a/vulnerabilities/tests/test_pypa.py b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py similarity index 100% rename from vulnerabilities/tests/test_pypa.py rename to vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py diff --git a/vulnerabilities/tests/test_data_source.py b/vulnerabilities/tests/test_data_source.py index 7d0a5f707..50f31caaf 100644 --- a/vulnerabilities/tests/test_data_source.py +++ b/vulnerabilities/tests/test_data_source.py @@ -26,7 +26,6 @@ from vulnerabilities.importers.istio import IstioImporter from vulnerabilities.importers.mozilla import MozillaImporter from vulnerabilities.importers.npm import NpmImporter -from vulnerabilities.importers.pypa import PyPaImporter from vulnerabilities.importers.retiredotnet import RetireDotnetImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.oval_parser import OvalParser @@ -124,7 +123,6 @@ def test_git_importer(mock_clone): MozillaImporter, NpmImporter, RetireDotnetImporter, - PyPaImporter, RubyImporter, GithubOSVImporter, ], From 48e85270a404e0636153f39252405f3ab57ebc59 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 00:36:44 +0530 Subject: [PATCH 2/9] Migrate PyPa importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/pypa_importer.py | 74 ++++++++++++---------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index e0648e1c2..f86c7b984 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -7,60 +7,64 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging -import os from pathlib import Path from typing import Iterable import saneyaml +from fetchcode.vcs import fetch_via_vcs from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import Importer from vulnerabilities.importers.osv import parse_advisory_data +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_advisory_url -logger = logging.getLogger(__name__) +module_logger = logging.getLogger(__name__) -class PyPaImporter(Importer): - license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" +class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect advisories from PyPA GitHub repository.""" + spdx_license_expression = "CC-BY-4.0" + license_url = "https://github.com/pypa/advisory-database/blob/main/LICENSE" repo_url = "git+https://github.com/pypa/advisory-database" importer_name = "Pypa Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - try: - vcs_response = self.clone(repo_url=self.repo_url) - path = Path(vcs_response.dest_dir) - for advisory_url, raw_data in fork_and_get_files(base_path=path): - yield parse_advisory_data( - raw_data=raw_data, - supported_ecosystems=["pypi"], - advisory_url=advisory_url, - ) - finally: - if self.vcs_response: - self.vcs_response.delete() + @classmethod + def steps(cls): + return ( + cls.clone, + cls.collect_and_store_advisories, + cls.import_new_advisories, + cls.clean_downloads, + ) + def clone(self): + self.log(f"Cloning `{self.repo_url}`") + self.vcs_response = fetch_via_vcs(self.repo_url) -class ForkError(Exception): - pass + def advisories_count(self): + vulns_directory = Path(self.vcs_response.dest_dir) / "vulns" + return sum(1 for _ in vulns_directory.rglob("*.yaml")) + def collect_advisories(self) -> Iterable[AdvisoryData]: + base_directory = Path(self.vcs_response.dest_dir) + vulns_directory = base_directory / "vulns" + self.advisories_count = sum(1 for _ in vulns_directory.rglob("*.yaml")) -def fork_and_get_files(base_path) -> dict: - """ - Yield advisorie data mappings from the PyPA GitHub repository at ``url``. - """ - advisory_dirs = os.path.join(base_path, "vulns") - for root, _, files in os.walk(advisory_dirs): - for file in files: - path = os.path.join(root, file) - if not file.endswith(".yaml"): - logger.warning(f"Unsupported non-YAML PyPA advisory file: {path}") - continue + for advisory in vulns_directory.rglob("*.yaml"): advisory_url = get_advisory_url( - file=Path(path), - base_path=base_path, + file=advisory, + base_path=base_directory, url="https://github.com/pypa/advisory-database/blob/main/", ) - with open(path) as f: - yield advisory_url, saneyaml.load(f.read()) + advisory_dict = saneyaml.load(advisory.read_text()) + yield parse_advisory_data( + raw_data=advisory_dict, + supported_ecosystems=["pypi"], + advisory_url=advisory_url, + ) + + def clean_downloads(self): + if self.vcs_response: + self.log(f"Removing cloned repository") + self.vcs_response.delete() From 29d96d3bab3a24c312643857f0a8f3131082b4f3 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 20 Aug 2024 13:33:37 +0530 Subject: [PATCH 3/9] Add step to import newly collected advisory Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 52 +++---- vulnerabilities/pipelines/pipes/importer.py | 159 ++++++++++++++++++++ 2 files changed, 177 insertions(+), 34 deletions(-) create mode 100644 vulnerabilities/pipelines/pipes/importer.py diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index ecba8544d..9c12d6f26 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -15,10 +15,10 @@ from aboutcode.pipeline import BasePipeline from aboutcode.pipeline import LoopProgress -from vulnerabilities import import_runner from vulnerabilities.importer import AdvisoryData -from vulnerabilities.improvers.default import DefaultImporter -from vulnerabilities.models import Advisory +from vulnerabilities.improver import MAX_CONFIDENCE +from vulnerabilities.pipelines.pipes.importer import import_advisory +from vulnerabilities.pipelines.pipes.importer import insert_advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -47,13 +47,14 @@ class VulnerableCodeBaseImporterPipeline(VulnerableCodePipeline): Uses: Subclass this Pipeline and implement ``advisories_count`` and ``collect_advisories`` method. - Also override the ``steps`` if needed. + Also override the ``steps`` and ``advisory_confidence`` as needed. """ license_url = None spdx_license_expression = None repo_url = None importer_name = None + advisory_confidence = MAX_CONFIDENCE @classmethod def steps(cls): @@ -86,34 +87,17 @@ def collect_and_store_advisories(self): collected_advisory_count = 0 progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) for advisory in progress.iter(self.collect_advisories()): - self.insert_advisory(advisory=advisory) + new_advisory = insert_advisory( + advisory=advisory, + pipeline_name=self.qualified_name, + logger=self.log, + ) + if new_advisory: + self.new_advisories.append(new_advisory) collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") - def insert_advisory(self, advisory: AdvisoryData): - try: - obj, created = Advisory.objects.get_or_create( - aliases=advisory.aliases, - summary=advisory.summary, - affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], - references=[ref.to_dict() for ref in advisory.references], - date_published=advisory.date_published, - weaknesses=advisory.weaknesses, - defaults={ - "created_by": self.qualified_name, - "date_collected": datetime.now(timezone.utc), - }, - url=advisory.url, - ) - if created: - self.new_advisories.append(obj) - except Exception as e: - self.log( - f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", - level=logging.ERROR, - ) - def import_new_advisories(self): new_advisories_count = len(self.new_advisories) @@ -129,14 +113,14 @@ def import_advisory(self, advisory) -> None: if advisory.date_imported: return try: - advisory_importer = DefaultImporter(advisories=[advisory]) - inferences = advisory_importer.get_inferences(advisory_data=advisory.to_advisory_data()) - import_runner.process_inferences( - inferences=inferences, + import_advisory( advisory=advisory, - improver_name=self.qualified_name, + pipeline_name=self.qualified_name, + confidence=self.advisory_confidence, + logger=self.log, ) except Exception as e: self.log( - f"Failed to process advisory: {advisory!r} with error {e!r}", level=logging.ERROR + f"Failed to process advisory: {advisory!r} with error {e!r}", + level=logging.ERROR, ) diff --git a/vulnerabilities/pipelines/pipes/importer.py b/vulnerabilities/pipelines/pipes/importer.py new file mode 100644 index 000000000..ae76381b4 --- /dev/null +++ b/vulnerabilities/pipelines/pipes/importer.py @@ -0,0 +1,159 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/nexB/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import logging +from datetime import datetime +from datetime import timezone +from traceback import format_exc as traceback_format_exc +from typing import Callable + +from django.db import transaction + +from vulnerabilities import import_runner +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.improver import MAX_CONFIDENCE +from vulnerabilities.improvers import default +from vulnerabilities.models import Advisory +from vulnerabilities.models import Package +from vulnerabilities.models import PackageRelatedVulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.models import Weakness + + +def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable): + try: + obj, created = Advisory.objects.get_or_create( + aliases=advisory.aliases, + summary=advisory.summary, + affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], + references=[ref.to_dict() for ref in advisory.references], + date_published=advisory.date_published, + weaknesses=advisory.weaknesses, + defaults={ + "created_by": pipeline_name, + "date_collected": datetime.now(timezone.utc), + }, + url=advisory.url, + ) + if created: + return obj + except Exception as e: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + +@transaction.atomic +def import_advisory( + advisory: Advisory, + pipeline_name: str, + logger: Callable, + confidence: int = MAX_CONFIDENCE, +): + """ + Create initial Vulnerability Package relationships for the advisory, + including references and severity scores. + + Package relationships are established only for resolved (concrete) versions. + """ + + advisory_data: AdvisoryData = advisory.to_advisory_data() + logger(f"Importing advisory id: {advisory.id}", level=logging.DEBUG) + + affected_purls = [] + fixed_purls = [] + for affected_package in advisory_data.affected_packages: + package_affected_purls, package_fixed_purls = default.get_exact_purls( + affected_package=affected_package + ) + affected_purls.extend(package_affected_purls) + fixed_purls.extend(package_fixed_purls) + + vulnerability = import_runner.get_or_create_vulnerability_and_aliases( + vulnerability_id=None, + aliases=advisory_data.aliases, + summary=advisory_data.summary, + advisory=advisory, + ) + + if not vulnerability: + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + return + + for ref in advisory_data.references: + reference = VulnerabilityReference.objects.get_or_none( + reference_id=ref.reference_id, + url=ref.url, + ) + if not reference: + reference = import_runner.create_valid_vulnerability_reference( + reference_id=ref.reference_id, + url=ref.url, + ) + if not reference: + continue + + VulnerabilityRelatedReference.objects.update_or_create( + reference=reference, + vulnerability=vulnerability, + ) + for severity in ref.severities: + try: + published_at = str(severity.published_at) if severity.published_at else None + _, created = VulnerabilitySeverity.objects.update_or_create( + scoring_system=severity.system.identifier, + reference=reference, + defaults={ + "value": str(severity.value), + "scoring_elements": str(severity.scoring_elements), + "published_at": published_at, + }, + ) + except: + logger( + f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}", + level=logging.ERROR, + ) + if not created: + logger( + f"Severity updated for reference {ref!r} to value: {severity.value!r} " + f"and scoring_elements: {severity.scoring_elements!r}", + level=logging.DEBUG, + ) + + for affected_purl in affected_purls or []: + vulnerable_package, _ = Package.objects.get_or_create_from_purl(purl=affected_purl) + PackageRelatedVulnerability( + vulnerability=vulnerability, + package=vulnerable_package, + created_by=pipeline_name, + confidence=confidence, + fix=False, + ).update_or_create(advisory=advisory) + + for fixed_purl in fixed_purls: + fixed_package, _ = Package.objects.get_or_create_from_purl(purl=fixed_purl) + PackageRelatedVulnerability( + vulnerability=vulnerability, + package=fixed_package, + created_by=pipeline_name, + confidence=confidence, + fix=True, + ).update_or_create(advisory=advisory) + + if advisory_data.weaknesses and vulnerability: + for cwe_id in advisory_data.weaknesses: + cwe_obj, _ = Weakness.objects.get_or_create(cwe_id=cwe_id) + cwe_obj.vulnerabilities.add(vulnerability) + cwe_obj.save() + + advisory.date_imported = datetime.now(timezone.utc) + advisory.save() From 3ea12c3d82f569d3761a35a37508a97e7885e9a6 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 20 Aug 2024 23:27:06 +0530 Subject: [PATCH 4/9] Add test for base and pypa importer pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/import_runner.py | 1 - vulnerabilities/pipelines/__init__.py | 7 ++- .../{pipelines => }/pipes/importer.py | 35 ++++++----- vulnerabilities/tests/__init__.py | 48 +++++++++++++- .../tests/pipelines/test_base_pipeline.py | 63 +++++++++++++++++++ .../pipelines/test_pypa_importer_pipeline.py | 14 +++-- vulnerabilities/tests/pipes/test_importer.py | 30 +++++++++ 7 files changed, 172 insertions(+), 26 deletions(-) rename vulnerabilities/{pipelines => }/pipes/importer.py (84%) create mode 100644 vulnerabilities/tests/pipelines/test_base_pipeline.py create mode 100644 vulnerabilities/tests/pipes/test_importer.py diff --git a/vulnerabilities/import_runner.py b/vulnerabilities/import_runner.py index 4c8e26889..5e5937951 100644 --- a/vulnerabilities/import_runner.py +++ b/vulnerabilities/import_runner.py @@ -18,7 +18,6 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Importer -from vulnerabilities.importers import IMPORTERS_REGISTRY from vulnerabilities.improver import Inference from vulnerabilities.improvers.default import DefaultImporter from vulnerabilities.models import Advisory diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 9c12d6f26..d2f4453d5 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -3,9 +3,10 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + import logging from datetime import datetime from datetime import timezone @@ -17,8 +18,8 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.pipelines.pipes.importer import import_advisory -from vulnerabilities.pipelines.pipes.importer import insert_advisory +from vulnerabilities.pipes.importer import import_advisory +from vulnerabilities.pipes.importer import insert_advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) diff --git a/vulnerabilities/pipelines/pipes/importer.py b/vulnerabilities/pipes/importer.py similarity index 84% rename from vulnerabilities/pipelines/pipes/importer.py rename to vulnerabilities/pipes/importer.py index ae76381b4..a040b4850 100644 --- a/vulnerabilities/pipelines/pipes/importer.py +++ b/vulnerabilities/pipes/importer.py @@ -3,9 +3,10 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + import logging from datetime import datetime from datetime import timezone @@ -14,10 +15,8 @@ from django.db import transaction -from vulnerabilities import import_runner from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.improvers import default from vulnerabilities.models import Advisory from vulnerabilities.models import Package from vulnerabilities.models import PackageRelatedVulnerability @@ -55,8 +54,8 @@ def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable def import_advisory( advisory: Advisory, pipeline_name: str, - logger: Callable, confidence: int = MAX_CONFIDENCE, + logger: Callable = None, ): """ Create initial Vulnerability Package relationships for the advisory, @@ -64,9 +63,12 @@ def import_advisory( Package relationships are established only for resolved (concrete) versions. """ + from vulnerabilities import import_runner + from vulnerabilities.improvers import default advisory_data: AdvisoryData = advisory.to_advisory_data() - logger(f"Importing advisory id: {advisory.id}", level=logging.DEBUG) + if logger: + logger(f"Importing advisory id: {advisory.id}", level=logging.DEBUG) affected_purls = [] fixed_purls = [] @@ -85,7 +87,8 @@ def import_advisory( ) if not vulnerability: - logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) + if logger: + logger(f"Unable to get vulnerability for advisory: {advisory!r}", level=logging.WARNING) return for ref in advisory_data.references: @@ -118,16 +121,18 @@ def import_advisory( }, ) except: - logger( - f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}", - level=logging.ERROR, - ) + if logger: + logger( + f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}", + level=logging.ERROR, + ) if not created: - logger( - f"Severity updated for reference {ref!r} to value: {severity.value!r} " - f"and scoring_elements: {severity.scoring_elements!r}", - level=logging.DEBUG, - ) + if logger: + logger( + f"Severity updated for reference {ref!r} to value: {severity.value!r} " + f"and scoring_elements: {severity.scoring_elements!r}", + level=logging.DEBUG, + ) for affected_purl in affected_purls or []: vulnerable_package, _ = Package.objects.get_or_create_from_purl(purl=affected_purl) diff --git a/vulnerabilities/tests/__init__.py b/vulnerabilities/tests/__init__.py index bdac1cd30..ee106cc74 100644 --- a/vulnerabilities/tests/__init__.py +++ b/vulnerabilities/tests/__init__.py @@ -3,6 +3,52 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange + +from vulnerabilities import models +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference + +advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", +) + + +advisory1 = models.Advisory( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by="tests", + date_collected=timezone.now(), +) + + +def get_all_vulnerability_relationships_objects(): + return { + "vulnerabilities": list(models.Vulnerability.objects.all()), + "aliases": list(models.Alias.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "advisories": list(models.Advisory.objects.all()), + "packages": list(models.Package.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "severity": list(models.VulnerabilitySeverity.objects.all()), + } diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py new file mode 100644 index 000000000..bda0479c0 --- /dev/null +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -0,0 +1,63 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from unittest.mock import patch + +from django.test import TestCase + +from vulnerabilities import models +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.tests import advisory1 +from vulnerabilities.tests import advisory_data1 + + +class TestVulnerableCodeBaseImporterPipeline(TestCase): + @patch.object( + VulnerableCodeBaseImporterPipeline, + "collect_advisories", + return_value=[advisory_data1], + ) + @patch.object( + VulnerableCodeBaseImporterPipeline, + "advisories_count", + return_value=1, + ) + def test_collect_and_store_advisories(self, mock_advisories_count, mock_collect_advisories): + self.assertEqual(0, models.Advisory.objects.count()) + + base_pipeline = VulnerableCodeBaseImporterPipeline() + base_pipeline.collect_and_store_advisories() + + mock_advisories_count.assert_called_once() + mock_collect_advisories.assert_called_once() + + self.assertEqual(1, models.Advisory.objects.count()) + + collected_advisory = models.Advisory.objects.first() + result_aliases = collected_advisory.aliases + expected_aliases = advisory_data1.aliases + + self.assertEqual(expected_aliases, result_aliases) + self.assertEqual(base_pipeline.qualified_name, collected_advisory.created_by) + + def test_import_new_advisories(self): + self.assertEqual(0, models.Vulnerability.objects.count()) + + base_pipeline = VulnerableCodeBaseImporterPipeline() + base_pipeline.new_advisories = [advisory1] + base_pipeline.import_new_advisories() + + self.assertEqual(1, models.Vulnerability.objects.count()) + + imported_vulnerability = models.Vulnerability.objects.first() + + self.assertEqual(1, imported_vulnerability.aliases.count()) + + expected_alias = imported_vulnerability.aliases.first() + self.assertEqual(advisory1.aliases[0], expected_alias.alias) diff --git a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py index 1a59260e6..fa1360f1d 100644 --- a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py @@ -3,10 +3,12 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # + import os +from pathlib import Path from unittest import TestCase import saneyaml @@ -14,14 +16,14 @@ from vulnerabilities.importers.osv import parse_advisory_data from vulnerabilities.tests import util_tests -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data/pypa") +TEST_DATA = data = Path(__file__).parent.parent / "test_data" / "pypa" -class TestPyPaImporter(TestCase): +class TestPyPaImporterPipeline(TestCase): def test_to_advisories_with_summary(self): - with open(os.path.join(TEST_DATA, "pypa_test.yaml")) as f: - mock_response = saneyaml.load(f) + pypa_advisory_path = TEST_DATA / "pypa_test.yaml" + + mock_response = saneyaml.load(pypa_advisory_path.read_text()) expected_file = os.path.join(TEST_DATA, "pypa-expected.json") imported_data = parse_advisory_data( mock_response, diff --git a/vulnerabilities/tests/pipes/test_importer.py b/vulnerabilities/tests/pipes/test_importer.py new file mode 100644 index 000000000..4163009a7 --- /dev/null +++ b/vulnerabilities/tests/pipes/test_importer.py @@ -0,0 +1,30 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import pytest + +from vulnerabilities.pipes.importer import import_advisory +from vulnerabilities.tests import advisory1 +from vulnerabilities.tests import get_all_vulnerability_relationships_objects + + +@pytest.mark.django_db +def test_vulnerability_pipes_importer_import_advisory(): + import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") + all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() + import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") + assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects() + + +@pytest.mark.django_db +def test_vulnerability_pipes_importer_import_advisory_different_pipelines(): + import_advisory(advisory=advisory1, pipeline_name="test_importer1_pipeline") + all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() + import_advisory(advisory=advisory1, pipeline_name="test_importer2_pipeline") + assert all_vulnerability_relation_objects == get_all_vulnerability_relationships_objects() From bb5c0061107ca0e9778cad7ca87f243280885e30 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 26 Aug 2024 20:50:44 +0530 Subject: [PATCH 5/9] Do not keep new advisories in memory while importing Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 35 ++++++++++--------- .../pipes/{importer.py => advisory.py} | 20 ++++++----- 2 files changed, 29 insertions(+), 26 deletions(-) rename vulnerabilities/pipes/{importer.py => advisory.py} (94%) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index d2f4453d5..3dd1b8e73 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -10,7 +10,6 @@ import logging from datetime import datetime from datetime import timezone -from traceback import format_exc as traceback_format_exc from typing import Iterable from aboutcode.pipeline import BasePipeline @@ -18,8 +17,8 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE -from vulnerabilities.pipes.importer import import_advisory -from vulnerabilities.pipes.importer import insert_advisory +from vulnerabilities.models import Advisory +from vulnerabilities.pipes import advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -83,38 +82,40 @@ def advisories_count(self) -> int: raise NotImplementedError def collect_and_store_advisories(self): - self.new_advisories = [] - collected_advisory_count = 0 progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) for advisory in progress.iter(self.collect_advisories()): - new_advisory = insert_advisory( + if _obj := advisory.insert_advisory( advisory=advisory, pipeline_name=self.qualified_name, logger=self.log, - ) - if new_advisory: - self.new_advisories.append(new_advisory) - collected_advisory_count += 1 + ): + collected_advisory_count += 1 self.log(f"Successfully collected {collected_advisory_count:,d} advisories") def import_new_advisories(self): - new_advisories_count = len(self.new_advisories) + new_advisories = Advisory.objects.filter( + created_by=self.qualified_name, + date_imported__isnull=True, + ) + + new_advisories_count = new_advisories.count() + + self.log(f"Importing {new_advisories_count:,d} new advisories") imported_advisory_count = 0 progress = LoopProgress(total_iterations=new_advisories_count, logger=self.log) - for advisory in progress.iter(self.new_advisories): + for advisory in progress.iter(new_advisories.paginated()): self.import_advisory(advisory=advisory) - imported_advisory_count += 1 + if advisory.date_imported: + imported_advisory_count += 1 self.log(f"Successfully imported {imported_advisory_count:,d} new advisories") - def import_advisory(self, advisory) -> None: - if advisory.date_imported: - return + def import_advisory(self, advisory: Advisory) -> int: try: - import_advisory( + advisory.import_advisory( advisory=advisory, pipeline_name=self.qualified_name, confidence=self.advisory_confidence, diff --git a/vulnerabilities/pipes/importer.py b/vulnerabilities/pipes/advisory.py similarity index 94% rename from vulnerabilities/pipes/importer.py rename to vulnerabilities/pipes/advisory.py index a040b4850..4b264481c 100644 --- a/vulnerabilities/pipes/importer.py +++ b/vulnerabilities/pipes/advisory.py @@ -26,28 +26,30 @@ from vulnerabilities.models import Weakness -def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable): +def insert_advisory(advisory: AdvisoryData, pipeline_name: str, logger: Callable = None): + obj = None try: - obj, created = Advisory.objects.get_or_create( + obj, _ = Advisory.objects.get_or_create( aliases=advisory.aliases, summary=advisory.summary, affected_packages=[pkg.to_dict() for pkg in advisory.affected_packages], references=[ref.to_dict() for ref in advisory.references], date_published=advisory.date_published, weaknesses=advisory.weaknesses, + url=advisory.url, defaults={ "created_by": pipeline_name, "date_collected": datetime.now(timezone.utc), }, - url=advisory.url, ) - if created: - return obj except Exception as e: - logger( - f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", - level=logging.ERROR, - ) + if logger: + logger( + f"Error while processing {advisory!r} with aliases {advisory.aliases!r}: {e!r} \n {traceback_format_exc()}", + level=logging.ERROR, + ) + + return obj @transaction.atomic From ce1ea4c248b07c32350f8158841364d7593d35da Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 26 Aug 2024 21:24:51 +0530 Subject: [PATCH 6/9] Fix failing test Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 7 ++++--- vulnerabilities/tests/__init__.py | 19 ++++++++++--------- .../tests/pipelines/test_base_pipeline.py | 4 ++-- .../{test_importer.py => test_advisory.py} | 6 ++++-- 4 files changed, 20 insertions(+), 16 deletions(-) rename vulnerabilities/tests/pipes/{test_importer.py => test_advisory.py} (84%) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 3dd1b8e73..50ce05432 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -18,7 +18,8 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.improver import MAX_CONFIDENCE from vulnerabilities.models import Advisory -from vulnerabilities.pipes import advisory +from vulnerabilities.pipes.advisory import import_advisory +from vulnerabilities.pipes.advisory import insert_advisory from vulnerabilities.utils import classproperty module_logger = logging.getLogger(__name__) @@ -85,7 +86,7 @@ def collect_and_store_advisories(self): collected_advisory_count = 0 progress = LoopProgress(total_iterations=self.advisories_count(), logger=self.log) for advisory in progress.iter(self.collect_advisories()): - if _obj := advisory.insert_advisory( + if _obj := insert_advisory( advisory=advisory, pipeline_name=self.qualified_name, logger=self.log, @@ -115,7 +116,7 @@ def import_new_advisories(self): def import_advisory(self, advisory: Advisory) -> int: try: - advisory.import_advisory( + import_advisory( advisory=advisory, pipeline_name=self.qualified_name, confidence=self.advisory_confidence, diff --git a/vulnerabilities/tests/__init__.py b/vulnerabilities/tests/__init__.py index ee106cc74..2e6da3cea 100644 --- a/vulnerabilities/tests/__init__.py +++ b/vulnerabilities/tests/__init__.py @@ -31,15 +31,16 @@ ) -advisory1 = models.Advisory( - aliases=advisory_data1.aliases, - summary=advisory_data1.summary, - affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], - references=[ref.to_dict() for ref in advisory_data1.references], - url=advisory_data1.url, - created_by="tests", - date_collected=timezone.now(), -) +def get_advisory1(created_by="test_pipeline"): + return models.Advisory.objects.create( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by=created_by, + date_collected=timezone.now(), + ) def get_all_vulnerability_relationships_objects(): diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py index bda0479c0..3d747b421 100644 --- a/vulnerabilities/tests/pipelines/test_base_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -13,8 +13,8 @@ from vulnerabilities import models from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.tests import advisory1 from vulnerabilities.tests import advisory_data1 +from vulnerabilities.tests import get_advisory1 class TestVulnerableCodeBaseImporterPipeline(TestCase): @@ -50,7 +50,7 @@ def test_import_new_advisories(self): self.assertEqual(0, models.Vulnerability.objects.count()) base_pipeline = VulnerableCodeBaseImporterPipeline() - base_pipeline.new_advisories = [advisory1] + advisory1 = get_advisory1(created_by=base_pipeline.qualified_name) base_pipeline.import_new_advisories() self.assertEqual(1, models.Vulnerability.objects.count()) diff --git a/vulnerabilities/tests/pipes/test_importer.py b/vulnerabilities/tests/pipes/test_advisory.py similarity index 84% rename from vulnerabilities/tests/pipes/test_importer.py rename to vulnerabilities/tests/pipes/test_advisory.py index 4163009a7..8377a0b81 100644 --- a/vulnerabilities/tests/pipes/test_importer.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -9,13 +9,14 @@ import pytest -from vulnerabilities.pipes.importer import import_advisory -from vulnerabilities.tests import advisory1 +from vulnerabilities.pipes.advisory import import_advisory +from vulnerabilities.tests import get_advisory1 from vulnerabilities.tests import get_all_vulnerability_relationships_objects @pytest.mark.django_db def test_vulnerability_pipes_importer_import_advisory(): + advisory1 = get_advisory1(created_by="test_importer_pipeline") import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() import_advisory(advisory=advisory1, pipeline_name="test_importer_pipeline") @@ -24,6 +25,7 @@ def test_vulnerability_pipes_importer_import_advisory(): @pytest.mark.django_db def test_vulnerability_pipes_importer_import_advisory_different_pipelines(): + advisory1 = get_advisory1(created_by="test_importer_pipeline") import_advisory(advisory=advisory1, pipeline_name="test_importer1_pipeline") all_vulnerability_relation_objects = get_all_vulnerability_relationships_objects() import_advisory(advisory=advisory1, pipeline_name="test_importer2_pipeline") From 6b8b97808f93757992f6c8d673a455c266cc6426 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 01:18:04 +0530 Subject: [PATCH 7/9] Add docstring for get_advisory_url Signed-off-by: Keshav Priyadarshi --- vulnerabilities/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vulnerabilities/utils.py b/vulnerabilities/utils.py index c6874b7df..54b318101 100644 --- a/vulnerabilities/utils.py +++ b/vulnerabilities/utils.py @@ -559,6 +559,9 @@ def get_importer_name(advisory): def get_advisory_url(file, base_path, url): + """ + Return the advisory URL constructed by combining the base URL with the relative file path. + """ relative_path = str(file.relative_to(base_path)).strip("/") advisory_url = urljoin(url, relative_path) return advisory_url From 1c39cc199bbdf53cce83cbf92f699f1c7cc66abe Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 01:19:28 +0530 Subject: [PATCH 8/9] Inline test fixtures Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/__init__.py | 47 ------------------- .../tests/pipelines/test_base_pipeline.py | 34 +++++++++++++- vulnerabilities/tests/pipes/test_advisory.py | 47 ++++++++++++++++++- 3 files changed, 77 insertions(+), 51 deletions(-) diff --git a/vulnerabilities/tests/__init__.py b/vulnerabilities/tests/__init__.py index 2e6da3cea..20854f2ad 100644 --- a/vulnerabilities/tests/__init__.py +++ b/vulnerabilities/tests/__init__.py @@ -6,50 +6,3 @@ # See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # - -from django.utils import timezone -from packageurl import PackageURL -from univers.version_range import VersionRange - -from vulnerabilities import models -from vulnerabilities.importer import AdvisoryData -from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Reference - -advisory_data1 = AdvisoryData( - aliases=["CVE-2020-13371337"], - summary="vulnerability description here", - affected_packages=[ - AffectedPackage( - package=PackageURL(type="pypi", name="dummy"), - affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), - ) - ], - references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], - date_published=timezone.now(), - url="https://test.com", -) - - -def get_advisory1(created_by="test_pipeline"): - return models.Advisory.objects.create( - aliases=advisory_data1.aliases, - summary=advisory_data1.summary, - affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], - references=[ref.to_dict() for ref in advisory_data1.references], - url=advisory_data1.url, - created_by=created_by, - date_collected=timezone.now(), - ) - - -def get_all_vulnerability_relationships_objects(): - return { - "vulnerabilities": list(models.Vulnerability.objects.all()), - "aliases": list(models.Alias.objects.all()), - "references": list(models.VulnerabilityReference.objects.all()), - "advisories": list(models.Advisory.objects.all()), - "packages": list(models.Package.objects.all()), - "references": list(models.VulnerabilityReference.objects.all()), - "severity": list(models.VulnerabilitySeverity.objects.all()), - } diff --git a/vulnerabilities/tests/pipelines/test_base_pipeline.py b/vulnerabilities/tests/pipelines/test_base_pipeline.py index 3d747b421..ea2e36a33 100644 --- a/vulnerabilities/tests/pipelines/test_base_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_base_pipeline.py @@ -10,11 +10,41 @@ from unittest.mock import patch from django.test import TestCase +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange from vulnerabilities import models +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline -from vulnerabilities.tests import advisory_data1 -from vulnerabilities.tests import get_advisory1 + +advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", +) + + +def get_advisory1(created_by="test_pipeline"): + return models.Advisory.objects.create( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by=created_by, + date_collected=timezone.now(), + ) class TestVulnerableCodeBaseImporterPipeline(TestCase): diff --git a/vulnerabilities/tests/pipes/test_advisory.py b/vulnerabilities/tests/pipes/test_advisory.py index 8377a0b81..67ab5046b 100644 --- a/vulnerabilities/tests/pipes/test_advisory.py +++ b/vulnerabilities/tests/pipes/test_advisory.py @@ -8,10 +8,53 @@ # import pytest +from django.utils import timezone +from packageurl import PackageURL +from univers.version_range import VersionRange +from vulnerabilities import models +from vulnerabilities.importer import AdvisoryData +from vulnerabilities.importer import AffectedPackage +from vulnerabilities.importer import Reference from vulnerabilities.pipes.advisory import import_advisory -from vulnerabilities.tests import get_advisory1 -from vulnerabilities.tests import get_all_vulnerability_relationships_objects + +advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="dummy"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", +) + + +def get_advisory1(created_by="test_pipeline"): + return models.Advisory.objects.create( + aliases=advisory_data1.aliases, + summary=advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in advisory_data1.affected_packages], + references=[ref.to_dict() for ref in advisory_data1.references], + url=advisory_data1.url, + created_by=created_by, + date_collected=timezone.now(), + ) + + +def get_all_vulnerability_relationships_objects(): + return { + "vulnerabilities": list(models.Vulnerability.objects.all()), + "aliases": list(models.Alias.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "advisories": list(models.Advisory.objects.all()), + "packages": list(models.Package.objects.all()), + "references": list(models.VulnerabilityReference.objects.all()), + "severity": list(models.VulnerabilitySeverity.objects.all()), + } @pytest.mark.django_db From d73cfd49dd1055a3393747a8c83d6cfdcf469b3a Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Tue, 27 Aug 2024 01:21:08 +0530 Subject: [PATCH 9/9] Log the full stack trace on error Signed-off-by: Keshav Priyadarshi --- vulnerabilities/pipelines/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/__init__.py b/vulnerabilities/pipelines/__init__.py index 50ce05432..13bd0033d 100644 --- a/vulnerabilities/pipelines/__init__.py +++ b/vulnerabilities/pipelines/__init__.py @@ -10,6 +10,7 @@ import logging from datetime import datetime from datetime import timezone +from traceback import format_exc as traceback_format_exc from typing import Iterable from aboutcode.pipeline import BasePipeline @@ -124,6 +125,6 @@ def import_advisory(self, advisory: Advisory) -> int: ) except Exception as e: self.log( - f"Failed to process advisory: {advisory!r} with error {e!r}", + f"Failed to import advisory: {advisory!r} with error {e!r}:\n{traceback_format_exc()}", level=logging.ERROR, )