Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate Nginx importer to aboutcode pipeline #1575

Merged
merged 4 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions vulnerabilities/importers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
from vulnerabilities.importers import gitlab
from vulnerabilities.importers import istio
from vulnerabilities.importers import mozilla
from vulnerabilities.importers import nginx
from vulnerabilities.importers import nvd
from vulnerabilities.importers import openssl
from vulnerabilities.importers import oss_fuzz
Expand All @@ -40,14 +39,14 @@
from vulnerabilities.importers import vulnrichment
from vulnerabilities.importers import xen
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines import nginx_importer
from vulnerabilities.pipelines import npm_importer
from vulnerabilities.pipelines import pypa_importer

IMPORTERS_REGISTRY = [
nvd.NVDImporter,
github.GitHubAPIImporter,
gitlab.GitLabAPIImporter,
nginx.NginxImporter,
pysec.PyPIImporter,
alpine_linux.AlpineImporter,
openssl.OpensslImporter,
Expand Down Expand Up @@ -78,6 +77,7 @@
vulnrichment.VulnrichImporter,
pypa_importer.PyPaImporterPipeline,
npm_importer.NpmImporterPipeline,
nginx_importer.NginxImporterPipeline,
]

IMPORTERS_REGISTRY = {
Expand Down
7 changes: 5 additions & 2 deletions vulnerabilities/improvers/valid_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,15 @@
from vulnerabilities.importers.github_osv import GithubOSVImporter
from vulnerabilities.importers.gitlab import GitLabAPIImporter
from vulnerabilities.importers.istio import IstioImporter
from vulnerabilities.importers.nginx import NginxImporter
from vulnerabilities.importers.oss_fuzz import OSSFuzzImporter
from vulnerabilities.importers.ruby import RubyImporter
from vulnerabilities.importers.ubuntu import UbuntuImporter
from vulnerabilities.improver import MAX_CONFIDENCE
from vulnerabilities.improver import Improver
from vulnerabilities.improver import Inference
from vulnerabilities.models import Advisory
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline
from vulnerabilities.pipelines.npm_importer import NpmImporterPipeline
from vulnerabilities.utils import AffectedPackage as LegacyAffectedPackage
from vulnerabilities.utils import clean_nginx_git_tag
Expand All @@ -63,6 +64,8 @@ class ValidVersionImprover(Improver):

@property
def interesting_advisories(self) -> QuerySet:
if issubclass(self.importer, VulnerableCodeBaseImporterPipeline):
return Advisory.objects.filter(Q(created_by=self.importer.pipeline_id)).paginated()
return Advisory.objects.filter(Q(created_by=self.importer.qualified_name)).paginated()

def get_package_versions(
Expand Down Expand Up @@ -220,7 +223,7 @@ class NginxBasicImprover(Improver):

@property
def interesting_advisories(self) -> QuerySet:
return Advisory.objects.filter(created_by=NginxImporter.qualified_name).paginated()
return Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).paginated()

def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:
all_versions = list(self.fetch_nginx_version_from_git_tags())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generated by Django 4.2.15 on 2024-09-23 13:06

from django.db import migrations

"""
Update the created_by field on Advisory from the old qualified_name
to the new pipeline_id.
"""


def update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by="vulnerabilities.importers.nginx.NginxImporter").update(
created_by=NginxImporterPipeline.pipeline_id
)



def reverse_update_created_by(apps, schema_editor):
from vulnerabilities.pipelines.nginx_importer import NginxImporterPipeline

Advisory = apps.get_model("vulnerabilities", "Advisory")
Advisory.objects.filter(created_by=NginxImporterPipeline.pipeline_id).update(
created_by="vulnerabilities.importers.nginx.NginxImporter"
)


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0064_update_npm_pypa_advisory_created_by"),
]

operations = [
migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by),
]
2 changes: 1 addition & 1 deletion vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ class Advisory(models.Model):
max_length=100,
help_text="Fully qualified name of the importer prefixed with the"
"module name importing the advisory. Eg:"
"vulnerabilities.importers.nginx.NginxImporter",
"vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline",
)
url = models.URLField(
blank=True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,58 +3,62 @@
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/nexB/vulnerablecode for support or download.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
from typing import Iterable
from typing import List
from typing import NamedTuple

import requests
from bs4 import BeautifulSoup
from django.db.models.query import QuerySet
from packageurl import PackageURL
from univers.version_range import NginxVersionRange
from univers.versions import NginxVersion

from vulnerabilities.importer import AdvisoryData
from vulnerabilities.importer import AffectedPackage
from vulnerabilities.importer import Importer
from vulnerabilities.importer import Reference
from vulnerabilities.importer import VulnerabilitySeverity
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.severity_systems import GENERIC

logger = logging.getLogger(__name__)

class NginxImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect Nginx security advisories."""

class NginxImporter(Importer):

url = "https://nginx.org/en/security_advisories.html"
pipeline_id = "nginx_importer"

spdx_license_expression = "BSD-2-Clause"
license_url = "https://nginx.org/LICENSE"
url = "https://nginx.org/en/security_advisories.html"
importer_name = "Nginx Importer"

def advisory_data(self) -> Iterable[AdvisoryData]:
text = self.fetch()
yield from advisory_data_from_text(text)
@classmethod
def steps(cls):
return (
cls.fetch,
cls.collect_and_store_advisories,
cls.import_new_advisories,
)

def fetch(self):
return requests.get(self.url).content
self.log(f"Fetch `{self.url}`")
self.advisory_data = requests.get(self.url).text

def advisories_count(self):
return self.advisory_data.count("<li><p>")

def advisory_data_from_text(text):
"""
Yield AdvisoryData from the ``text`` of the nginx security advisories HTML
web page.
"""
soup = BeautifulSoup(text, features="lxml")
vuln_list = soup.select("li p")
for vuln_info in vuln_list:
ngnix_adv = parse_advisory_data_from_paragraph(vuln_info)
yield to_advisory_data(ngnix_adv)
def collect_advisories(self) -> Iterable[AdvisoryData]:
"""
Yield AdvisoryData from nginx security advisories HTML
web page.
"""
soup = BeautifulSoup(self.advisory_data, features="lxml")
vulnerability_list = soup.select("li p")
for vulnerability_info in vulnerability_list:
ngnix_advisory = parse_advisory_data_from_paragraph(vulnerability_info)
yield to_advisory_data(ngnix_advisory)


class NginxAdvisory(NamedTuple):
Expand All @@ -69,15 +73,15 @@ def to_dict(self):
return self._asdict()


def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
def to_advisory_data(nginx_adv: NginxAdvisory) -> AdvisoryData:
"""
Return AdvisoryData from an NginxAdvisory tuple.
"""
package_name = "nginx"
package_type = "nginx"
qualifiers = {}

_, _, affected_version_range = ngnx_adv.vulnerable.partition(":")
_, _, affected_version_range = nginx_adv.vulnerable.partition(":")
if "nginx/Windows" in affected_version_range:
qualifiers["os"] = "windows"
affected_version_range = affected_version_range.replace("nginx/Windows", "")
Expand All @@ -87,7 +91,7 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
affected_version_range = NginxVersionRange.from_native(affected_version_range)

affected_packages = []
_, _, fixed_versions = ngnx_adv.not_vulnerable.partition(":")
_, _, fixed_versions = nginx_adv.not_vulnerable.partition(":")

for fixed_version in fixed_versions.split(","):
fixed_version = fixed_version.rstrip("+")
Expand All @@ -112,17 +116,17 @@ def to_advisory_data(ngnx_adv: NginxAdvisory) -> AdvisoryData:
)

return AdvisoryData(
aliases=ngnx_adv.aliases,
summary=ngnx_adv.summary,
aliases=nginx_adv.aliases,
summary=nginx_adv.summary,
affected_packages=affected_packages,
references=ngnx_adv.references,
references=nginx_adv.references,
url="https://nginx.org/en/security_advisories.html",
)


def parse_advisory_data_from_paragraph(vuln_info):
def parse_advisory_data_from_paragraph(vulnerability_info):
"""
Return an NginxAdvisory from a ``vuln_info`` bs4 paragraph.
Return an NginxAdvisory from a ``vulnerability_info`` bs4 paragraph.

An advisory paragraph, without html markup, looks like this:

Expand All @@ -145,7 +149,7 @@ def parse_advisory_data_from_paragraph(vuln_info):

# we iterate on the children to accumulate values in variables
# FIXME: using an explicit xpath-like query could be simpler
for child in vuln_info.children:
for child in vulnerability_info.children:
if is_first:
summary = child
is_first = False
Expand Down
2 changes: 0 additions & 2 deletions vulnerabilities/pipelines/pypa_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline
from vulnerabilities.utils import get_advisory_url

module_logger = logging.getLogger(__name__)


class PyPaImporterPipeline(VulnerableCodeBaseImporterPipeline):
"""Collect advisories from PyPA GitHub repository."""
Expand Down
Loading
Loading