diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index c44ced245..efd8b71e9 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -24,7 +24,6 @@ from vulnerabilities.importers import gitlab from vulnerabilities.importers import istio from vulnerabilities.importers import mozilla -from vulnerabilities.importers import nginx from vulnerabilities.importers import nvd from vulnerabilities.importers import openssl from vulnerabilities.importers import oss_fuzz @@ -40,6 +39,7 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import nginx_importer from vulnerabilities.pipelines import npm_importer from vulnerabilities.pipelines import pypa_importer @@ -47,7 +47,6 @@ nvd.NVDImporter, github.GitHubAPIImporter, gitlab.GitLabAPIImporter, - nginx.NginxImporter, pysec.PyPIImporter, alpine_linux.AlpineImporter, openssl.OpensslImporter, @@ -78,6 +77,7 @@ vulnrichment.VulnrichImporter, pypa_importer.PyPaImporterPipeline, npm_importer.NpmImporterPipeline, + nginx_importer.NginxImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/improvers/valid_versions.py b/vulnerabilities/improvers/valid_versions.py index 32f3dfc35..ecbf2ddd3 100644 --- a/vulnerabilities/improvers/valid_versions.py +++ b/vulnerabilities/improvers/valid_versions.py @@ -36,7 +36,6 @@ from vulnerabilities.importers.github_osv import GithubOSVImporter from vulnerabilities.importers.gitlab import GitLabAPIImporter from vulnerabilities.importers.istio import IstioImporter -from vulnerabilities.importers.nginx import NginxImporter from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter from vulnerabilities.importers.ruby import RubyImporter from vulnerabilities.importers.ubuntu import UbuntuImporter @@ -44,6 +43,8 @@ from vulnerabilities.improver import Improver from vulnerabilities.improver import Inference from vulnerabilities.models import Advisory +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage from vulnerabilities.utils import clean_nginx_git_tag @@ -63,6 +64,8 @@ class ValidVersionImprover(Improver): @property def interesting_advisories(self) -> QuerySet: + if issubclass(self.importer, VulnerableCodeBaseImporterPipeline): + return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).paginated() return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated() def get_package_versions( @@ -220,7 +223,7 @@ class NginxBasicImprover(Improver): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=NginxImporter.qualified_name).paginated() + return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).paginated() def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]: all_versions = list(self.fetch_nginx_version_from_git_tags()) diff --git a/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py b/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py new file mode 100644 index 000000000..80b43a954 --- /dev/null +++ b/vulnerabilities/migrations/0065_update_nginx_advisory_created_by.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.15 on 2024-09-23 13:06 + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").update( + created_by=NginxImporterPipeline.pipeline_id + ) + + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.nginx.NginxImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0064_update_npm_pypa_advisory_created_by"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ada9bec54..cc3e920d9 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1103,7 +1103,7 @@ class Advisory(models.Model): max_length=100, help_text="Fully qualified name of the importer prefixed with the" "module name importing the advisory. Eg:" - "vulnerabilities.importers.nginx.NginxImporter", + "vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline", ) url = models.URLField( blank=True, diff --git a/vulnerabilities/importers/nginx.py b/vulnerabilities/pipelines/nginx_importer.py similarity index 77% rename from vulnerabilities/importers/nginx.py rename to vulnerabilities/pipelines/nginx_importer.py index 4fe0ca6ae..c5e017033 100644 --- a/vulnerabilities/importers/nginx.py +++ b/vulnerabilities/pipelines/nginx_importer.py @@ -3,58 +3,62 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # -import logging from typing import Iterable -from typing import List from typing import NamedTuple import requests from bs4 import BeautifulSoup -from django.db.models.query import QuerySet from packageurl import PackageURL from univers.version_range import NginxVersionRange from univers.versions import NginxVersion from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.severity_systems import GENERIC -logger = logging.getLogger(__name__) +class NginxImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect Nginx security advisories.""" -class NginxImporter(Importer): - - url = "https://nginx.org/en/security_advisories.html" + pipeline_id = "nginx_importer" spdx_license_expression = "BSD-2-Clause" license_url = "https://nginx.org/LICENSE" + url = "https://nginx.org/en/security_advisories.html" importer_name = "Nginx Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - text = self.fetch() - yield from advisory_data_from_text(text) + @classmethod + def steps(cls): + return ( + cls.fetch, + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) def fetch(self): - return requests.get(self.url).content + self.log(f"Fetch `{self.url}`") + self.advisory_data = requests.get(self.url).text + def advisories_count(self): + return self.advisory_data.count("
  • ") -def advisory_data_from_text(text): - """ - Yield AdvisoryData from the ``text`` of the nginx security advisories HTML - web page. - """ - soup = BeautifulSoup(text, features="lxml") - vuln_list = soup.select("li p") - for vuln_info in vuln_list: - ngnix_adv = parse_advisory_data_from_paragraph(vuln_info) - yield to_advisory_data(ngnix_adv) + def collect_advisories(self) -> Iterable[AdvisoryData]: + """ + Yield AdvisoryData from nginx security advisories HTML + web page. + """ + soup = BeautifulSoup(self.advisory_data, features="lxml") + vulnerability_list = soup.select("li p") + for vulnerability_info in vulnerability_list: + ngnix_advisory = parse_advisory_data_from_paragraph(vulnerability_info) + yield to_advisory_data(ngnix_advisory) class NginxAdvisory(NamedTuple): @@ -69,7 +73,7 @@ def to_dict(self): return self._asdict() -def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: +def to_advisory_data(nginx_adv: NginxAdvisory) -> AdvisoryData: """ Return AdvisoryData from an NginxAdvisory tuple. """ @@ -77,7 +81,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: package_type = "nginx" qualifiers = {} - _, _, affected_version_range = ngnx_adv.vulnerable.partition(":") + _, _, affected_version_range = nginx_adv.vulnerable.partition(":") if "nginx/Windows" in affected_version_range: qualifiers["os"] = "windows" affected_version_range = affected_version_range.replace("nginx/Windows", "") @@ -87,7 +91,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: affected_version_range = NginxVersionRange.from_native(affected_version_range) affected_packages = [] - _, _, fixed_versions = ngnx_adv.not_vulnerable.partition(":") + _, _, fixed_versions = nginx_adv.not_vulnerable.partition(":") for fixed_version in fixed_versions.split(","): fixed_version = fixed_version.rstrip("+") @@ -112,17 +116,17 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData: ) return AdvisoryData( - aliases=ngnx_adv.aliases, - summary=ngnx_adv.summary, + aliases=nginx_adv.aliases, + summary=nginx_adv.summary, affected_packages=affected_packages, - references=ngnx_adv.references, + references=nginx_adv.references, url="https://nginx.org/en/security_advisories.html", ) -def parse_advisory_data_from_paragraph(vuln_info): +def parse_advisory_data_from_paragraph(vulnerability_info): """ - Return an NginxAdvisory from a ``vuln_info`` bs4 paragraph. + Return an NginxAdvisory from a ``vulnerability_info`` bs4 paragraph. An advisory paragraph, without html markup, looks like this: @@ -145,7 +149,7 @@ def parse_advisory_data_from_paragraph(vuln_info): # we iterate on the children to accumulate values in variables # FIXME: using an explicit xpath-like query could be simpler - for child in vuln_info.children: + for child in vulnerability_info.children: if is_first: summary = child is_first = False diff --git a/vulnerabilities/pipelines/pypa_importer.py b/vulnerabilities/pipelines/pypa_importer.py index 7a598de4d..29a1283fe 100644 --- a/vulnerabilities/pipelines/pypa_importer.py +++ b/vulnerabilities/pipelines/pypa_importer.py @@ -17,8 +17,6 @@ from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.utils import get_advisory_url -module_logger = logging.getLogger(__name__) - class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline): """Collect advisories from PyPA GitHub repository.""" diff --git a/vulnerabilities/tests/test_nginx.py b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py similarity index 83% rename from vulnerabilities/tests/test_nginx.py rename to vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py index c27ef2d10..8a71a11fd 100644 --- a/vulnerabilities/tests/test_nginx.py +++ b/vulnerabilities/tests/pipelines/test_nginx_importer_pipeline.py @@ -3,7 +3,7 @@ # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 # See http://www.apache.org/licenses/LICENSE-2.0 for the license text. -# See https://github.com/nexB/vulnerablecode for support or download. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. # See https://aboutcode.org for more information about nexB OSS projects. # @@ -15,16 +15,16 @@ from bs4 import BeautifulSoup from commoncode import testcase from django.db.models.query import QuerySet +from univers.version_range import NginxVersionRange from vulnerabilities import models from vulnerabilities import severity_systems -from vulnerabilities.import_runner import ImportRunner from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import Reference from vulnerabilities.importer import VulnerabilitySeverity -from vulnerabilities.importers import nginx from vulnerabilities.improvers.valid_versions import NginxBasicImprover from vulnerabilities.models import Advisory +from vulnerabilities.pipelines import nginx_importer from vulnerabilities.tests import util_tests from vulnerabilities.utils import is_vulnerable_nginx_version @@ -40,14 +40,14 @@ class TestNginxImporterAndImprover(testcase.FileBasedTesting): - test_data_dir = str(Path(__file__).resolve().parent / "test_data" / "nginx") + test_data_dir = Path(__file__).parent.parent / "test_data" / "nginx" def test_is_vulnerable(self): # Not vulnerable: 1.17.3+, 1.16.1+ # Vulnerable: 1.9.5-1.17.2 - vcls = nginx.NginxVersionRange.version_class - affected_version_range = nginx.NginxVersionRange.from_native("1.9.5-1.17.2") + vcls = NginxVersionRange.version_class + affected_version_range = NginxVersionRange.from_native("1.9.5-1.17.2") fixed_versions = [vcls("1.17.3"), vcls("1.16.1")] version = vcls("1.9.4") @@ -133,10 +133,10 @@ def test_parse_advisory_data_from_paragraph(self): ], } - result = nginx.parse_advisory_data_from_paragraph(vuln_info) + result = nginx_importer.parse_advisory_data_from_paragraph(vuln_info) assert result.to_dict() == expected - def test_advisory_data_from_text(self): + def test_collect_advisories(self): test_file = self.get_test_loc("security_advisories.html") with open(test_file) as tf: test_text = tf.read() @@ -145,52 +145,49 @@ def test_advisory_data_from_text(self): "security_advisories-advisory_data-expected.json", must_exist=False ) - results = [na.to_dict() for na in nginx.advisory_data_from_text(test_text)] + test_pipeline = nginx_importer.NginxImporterPipeline() + test_pipeline.advisory_data = test_text + results = [na.to_dict() for na in test_pipeline.collect_advisories()] util_tests.check_results_against_json(results, expected_file) @pytest.mark.django_db(transaction=True) - def test_NginxImporter(self): + def test_NginxImporterPipeline_collect_and_store_advisories(self): + test_file = self.get_test_loc("security_advisories.html") + with open(test_file) as tf: + test_text = tf.read() + + test_pipeline = nginx_importer.NginxImporterPipeline() + test_pipeline.advisory_data = test_text expected_file = self.get_test_loc( "security_advisories-importer-expected.json", must_exist=False ) - results, _cls = self.run_import() - util_tests.check_results_against_json(results, expected_file) + test_pipeline.collect_and_store_advisories() - # run again as there should be no duplicates - results, _cls = self.run_import() + results = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) util_tests.check_results_against_json(results, expected_file) - def run_import(self): - """ - Return a list of imported Advisory model objects and the MockImporter - used. - """ - - class MockImporter(nginx.NginxImporter): - """ - A mocked NginxImporter that loads content from a file rather than - making a network call. - """ - - def fetch(self): - with open(test_file) as tf: - return tf.read() - - test_file = self.get_test_loc("security_advisories.html") + # run again as there should be no duplicates + test_pipeline.collect_and_store_advisories() - ImportRunner(MockImporter).run() - return list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)), MockImporter + results = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) + util_tests.check_results_against_json(results, expected_file) @pytest.mark.django_db(transaction=True) def test_NginxBasicImprover__interesting_advisories(self): - advisories, importer_class = self.run_import() + test_file = self.get_test_loc("security_advisories.html") + with open(test_file) as tf: + test_text = tf.read() + + test_pipeline = nginx_importer.NginxImporterPipeline() + test_pipeline.advisory_data = test_text + advisories = list(models.Advisory.objects.all().values(*ADVISORY_FIELDS_TO_TEST)) class MockNginxBasicImprover(NginxBasicImprover): @property def interesting_advisories(self) -> QuerySet: - return Advisory.objects.filter(created_by=importer_class.qualified_name) + return Advisory.objects.filter(created_by=test_pipeline.pipeline_id) improver = MockNginxBasicImprover() interesting_advisories = list( diff --git a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py index fa1360f1d..0bb631012 100644 --- a/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_pypa_importer_pipeline.py @@ -16,7 +16,7 @@ from vulnerabilities.importers.osv import parse_advisory_data from vulnerabilities.tests import util_tests -TEST_DATA = data = Path(__file__).parent.parent / "test_data" / "pypa" +TEST_DATA = Path(__file__).parent.parent / "test_data" / "pypa" class TestPyPaImporterPipeline(TestCase): diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 6e11bf367..31d05507d 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -681,3 +681,42 @@ def test_removal_of_duped_purls(self): assert adv.filter(created_by="vulnerabilities.importers.npm.NpmImporter").count() == 0 assert adv.filter(created_by="npm_importer").count() == 1 + + +class TestUpdateNginxAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0064_update_npm_pypa_advisory_created_by" + migrate_to = "0065_update_nginx_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="nginx", name="nginx"), + affected_version_range=VersionRange.from_string("vers:nginx/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.nginx.NginxImporter", + date_collected=timezone.now(), + ) + + def test_removal_of_duped_purls(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert adv.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").count() == 0 + assert adv.filter(created_by="nginx_importer").count() == 1